Vendor import of llvm trunk r130700:

http://llvm.org/svn/llvm-project/llvm/trunk@130700
author: dim <dim@FreeBSD.org> 2011-05-02 19:34:44 +0000
committer: dim <dim@FreeBSD.org> 2011-05-02 19:34:44 +0000
commit: 2b066988909948dc3d53d01760bc2d71d32f3feb (patch)
tree: fc5f365fb9035b2d0c622bbf06c9bbe8627d7279
parent: c80ac9d286b8fcc6d1ee5d76048134cf80aa9edc (diff)
download: FreeBSD-src-2b066988909948dc3d53d01760bc2d71d32f3feb.zip
FreeBSD-src-2b066988909948dc3d53d01760bc2d71d32f3feb.tar.gz
1574 files changed, 61326 insertions, 32241 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b357478..e0404cf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,7 +10,7 @@ set(CMAKE_MODULE_PATH
   "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules"
   )
 
-set(PACKAGE_VERSION "2.9")
+set(PACKAGE_VERSION "3.0")
 
 set_property(GLOBAL PROPERTY USE_FOLDERS ON)
 
@@ -81,6 +81,9 @@ set(LLVM_ALL_TARGETS
   XCore
   )
 
+# List of targets with JIT support:
+set(LLVM_TARGETS_WITH_JIT X86 PowerPC ARM)
+
 if( MSVC )
   set(LLVM_TARGETS_TO_BUILD X86
     CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
@@ -135,9 +138,15 @@ include(AddLLVMDefinitions)
 
 option(LLVM_ENABLE_PIC "Build Position-Independent Code" ON)
 
-include(config-ix)
+# MSVC has a gazillion warnings with this.
+if( MSVC )
+  option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." OFF)
+else( MSVC )
+  option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." ON)
+endif()
 
-include(HandleLLVMOptions)
+option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
+option(LLVM_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF)
 
 if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
   option(LLVM_ENABLE_ASSERTIONS "Enable assertions" OFF)
@@ -145,6 +154,12 @@ else()
   option(LLVM_ENABLE_ASSERTIONS "Enable assertions" ON)
 endif()
 
+# All options referred to from HandleLLVMOptions have to be specified
+# BEFORE this include, otherwise options will not be correctly set on
+# first cmake run
+include(config-ix)
+include(HandleLLVMOptions)
+
 configure_file(
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/config.h.cmake
   ${LLVM_BINARY_DIR}/include/llvm/Config/config.h)
@@ -161,16 +176,6 @@ set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_TOOLS_BINARY_DIR} )
 set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib )
 set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib )
 
-# MSVC has a gazillion warnings with this.
-if( MSVC )
-  option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." OFF)
-else( MSVC )
-  option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." ON)
-endif()
-
-option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
-option(LLVM_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF)
-
 set(CMAKE_INCLUDE_CURRENT_DIR ON)
 
 include_directories( ${LLVM_BINARY_DIR}/include ${LLVM_MAIN_INCLUDE_DIR})
@@ -183,15 +188,6 @@ include(AddLLVM)
 include(TableGen)
 
 if( MINGW )
-  get_system_libs(LLVM_SYSTEM_LIBS_LIST)
-  foreach(l ${LLVM_SYSTEM_LIBS_LIST})
-    set(LLVM_SYSTEM_LIBS "${LLVM_SYSTEM_LIBS} -l${l}")
-  endforeach()
-  set(CMAKE_CXX_STANDARD_LIBRARIES "${CMAKE_CXX_STANDARD_LIBRARIES}${LLVM_SYSTEM_LIBS}")
-  set(CMAKE_C_STANDARD_LIBRARIES "${CMAKE_C_STANDARD_LIBRARIES}${LLVM_SYSTEM_LIBS}")
-endif()
-
-if( MINGW )
   # People report that -O3 is unreliable on MinGW. The traditional
   # build also uses -O2 for that reason:
   llvm_replace_compiler_option(CMAKE_CXX_FLAGS_RELEASE "-O3" "-O2")
@@ -231,6 +227,13 @@ if( LLVM_INCLUDE_TOOLS )
   add_subdirectory(tools)
 endif()
 
+option(LLVM_BUILD_RUNTIME
+  "Build the LLVM runtime libraries. If OFF, just generate build targets." ON)
+option(LLVM_INCLUDE_RUNTIME "Generate build targets for the LLVM runtimes" ON)
+if( LLVM_INCLUDE_RUNTIME )
+  add_subdirectory(runtime)
+endif()
+
 option(LLVM_BUILD_EXAMPLES
   "Build the LLVM example programs. If OFF, just generate build targets." OFF)
 option(LLVM_INCLUDE_EXAMPLES "Generate build targets for the LLVM examples" ON)
@@ -239,7 +242,7 @@ if( LLVM_INCLUDE_EXAMPLES )
 endif()
 
 option(LLVM_BUILD_TESTS
-  "Build LLVM unit tests. If OFF, just generate build targes." OFF)
+  "Build LLVM unit tests. If OFF, just generate build targets." OFF)
 if( LLVM_INCLUDE_TESTS )
   add_subdirectory(test)
   add_subdirectory(utils/unittest)
@@ -260,6 +263,7 @@ install(DIRECTORY include/
   PATTERN "*.h"
   PATTERN "*.td"
   PATTERN "*.inc"
+  PATTERN "LICENSE.TXT"
   PATTERN ".svn" EXCLUDE
   )
 
diff --git a/Makefile b/Makefile
index dbb759d..7dad07b 100644
--- a/Makefile
+++ b/Makefile
@@ -168,6 +168,15 @@ install-clang: install
 install-clang-c: install
 install-libs: install
 
+# If SHOW_DIAGNOSTICS is enabled, clear the diagnostics file first.
+ifeq ($(SHOW_DIAGNOSTICS),1)
+clean-diagnostics:
+	$(Verb) rm -f $(LLVM_OBJ_ROOT)/$(BuildMode)/diags
+.PHONY: clean-diagnostics
+
+all-local:: clean-diagnostics
+endif
+
 #------------------------------------------------------------------------
 # Make sure the generated headers are up-to-date. This must be kept in
 # sync with the AC_CONFIG_HEADER invocations in autoconf/configure.ac
@@ -198,6 +207,12 @@ ifneq ($(ENABLE_OPTIMIZED),1)
 	$(Echo) '*****' optimized build. Use 'make ENABLE_OPTIMIZED=1' to
 	$(Echo) '*****' make an optimized build. Alternatively you can
 	$(Echo) '*****' configure with --enable-optimized.
+ifeq ($(SHOW_DIAGNOSTICS),1)
+	$(Verb) if test -s $(LLVM_OBJ_ROOT)/$(BuildMode)/diags; then \
+	  $(LLVM_SRC_ROOT)/utils/show-diagnostics \
+	    $(LLVM_OBJ_ROOT)/$(BuildMode)/diags; \
+	fi
+endif
 endif
 endif
 
diff --git a/Makefile.rules b/Makefile.rules
index c0a9112..71d4307 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -444,11 +444,11 @@ endif
 # LLVM Capable Compiler
 #--------------------------------------------------------------------
 
-ifeq ($(LLVMCC_OPTION),llvm-gcc)
+ifneq ($(findstring llvm-gcc,$(LLVMCC_OPTION)),)
   LLVMCC := $(LLVMGCC)
   LLVMCXX := $(LLVMGXX)
 else
-  ifeq ($(LLVMCC_OPTION),clang)
+  ifneq ($(findstring clang,$(LLVMCC_OPTION)),)
     ifneq ($(CLANGPATH),)
       LLVMCC := $(CLANGPATH)
       LLVMCXX := $(CLANGXXPATH)
@@ -646,26 +646,42 @@ CPP.Flags     += $(sort -I$(PROJ_OBJ_DIR) -I$(PROJ_SRC_DIR) \
 	         $(LLVM_OBJ_ROOT) $(LLVM_SRC_ROOT))) \
 	         $(CPP.BaseFlags)
 
+# SHOW_DIAGNOSTICS support.
+ifeq ($(SHOW_DIAGNOSTICS),1)
+  Compile.Wrapper := env CC_LOG_DIAGNOSTICS=1 \
+	                  CC_LOG_DIAGNOSTICS_FILE="$(LLVM_OBJ_ROOT)/$(BuildMode)/diags"
+else
+  Compile.Wrapper :=
+endif
+
 ifeq ($(BUILD_COMPONENT), 1)
-  Compile.C     = $(BUILD_CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
+  Compile.C     = $(Compile.Wrapper) \
+	          $(BUILD_CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
                   $(TargetCommonOpts) $(CompileCommonOpts) -c
-  Compile.CXX   = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \
+  Compile.CXX   = $(Compile.Wrapper) \
+	          $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \
 		  $(CPPFLAGS) \
                   $(TargetCommonOpts) $(CompileCommonOpts) -c
-  Preprocess.CXX= $(BUILD_CXX) $(CPP.Flags) $(CPPFLAGS) $(TargetCommonOpts) \
+  Preprocess.CXX= $(Compile.Wrapper) \
+	          $(BUILD_CXX) $(CPP.Flags) $(CPPFLAGS) $(TargetCommonOpts) \
                   $(CompileCommonOpts) $(CXX.Flags) -E
-  Link          = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \
-		  $(LDFLAGS) \
-                  $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) $(Strip)
+  Link          = $(Compile.Wrapper) \
+	          $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \
+		  $(LD.Flags) $(LDFLAGS) \
+                  $(TargetCommonOpts) $(CompileCommonOpts) $(Strip)
 else
-  Compile.C     = $(CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
+  Compile.C     = $(Compile.Wrapper) \
+	          $(CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
                   $(TargetCommonOpts) $(CompileCommonOpts) -c
-  Compile.CXX   = $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(CPPFLAGS) \
+  Compile.CXX   = $(Compile.Wrapper) \
+	          $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(CPPFLAGS) \
                   $(TargetCommonOpts) $(CompileCommonOpts) -c
-  Preprocess.CXX= $(CXX) $(CPP.Flags) $(TargetCommonOpts) $(CPPFLAGS) \
+  Preprocess.CXX= $(Compile.Wrapper) \
+	          $(CXX) $(CPP.Flags) $(TargetCommonOpts) $(CPPFLAGS) \
                   $(CompileCommonOpts) $(CXX.Flags) -E
-  Link          = $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(LDFLAGS) \
-                  $(TargetCommonOpts)  $(CompileCommonOpts) $(LD.Flags) $(Strip)
+  Link          = $(Compile.Wrapper) \
+	          $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(LD.Flags) \
+                  $(LDFLAGS) $(TargetCommonOpts)  $(CompileCommonOpts) $(Strip)
 endif
 
 BCCompile.C   = $(LLVMCC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
@@ -719,6 +735,24 @@ BaseNameSources := $(sort $(basename $(Sources)))
 ObjectsO  := $(BaseNameSources:%=$(ObjDir)/%.o)
 ObjectsBC := $(BaseNameSources:%=$(ObjDir)/%.bc)
 
+#----------------------------------------------------------
+# For Mingw MSYS bash and Python/w32:
+#
+# $(ECHOPATH) prints DOSish pathstring.
+#   ex) $(ECHOPATH) /include/sys/types.h
+#   --> C:/mingw/include/sys/types.h
+# built-in "echo" does not transform path to DOSish path.
+#
+# FIXME: It would not be needed when MSYS's python
+# were provided.
+#----------------------------------------------------------
+
+ifeq (-mingw32,$(findstring -mingw32,$(BUILD_TRIPLE)))
+  ECHOPATH := $(Verb)python -u -c "import sys;print ' '.join(sys.argv[1:])"
+else
+  ECHOPATH := $(Verb)$(ECHO)
+endif
+
 ###############################################################################
 # DIRECTORIES: Handle recursive descent of directory structure
 ###############################################################################
@@ -1510,31 +1544,31 @@ BC_DEPEND_MOVEFILE = then $(MV) -f "$(ObjDir)/$*.bc.d.tmp" "$(ObjDir)/$*.bc.d";
 $(ObjDir)/%.ll: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.cpp for $(BuildMode) build (bytecode)"
 	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
-			$< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \
+			$< -o $(ObjDir)/$*.ll -S $(LLVMCC_EMITIR_FLAG) ; \
 	        $(BC_DEPEND_MOVEFILE)
 
 $(ObjDir)/%.ll: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.mm for $(BuildMode) build (bytecode)"
 	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
-			$< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \
+			$< -o $(ObjDir)/$*.ll -S $(LLVMCC_EMITIR_FLAG) ; \
 	        $(BC_DEPEND_MOVEFILE)
 
 $(ObjDir)/%.ll: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.cc for $(BuildMode) build (bytecode)"
 	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
-			$< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \
+			$< -o $(ObjDir)/$*.ll -S $(LLVMCC_EMITIR_FLAG) ; \
 	        $(BC_DEPEND_MOVEFILE)
 
 $(ObjDir)/%.ll: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
 	$(Echo) "Compiling $*.c for $(BuildMode) build (bytecode)"
 	$(Verb) if $(BCCompile.C) $(BC_DEPEND_OPTIONS) \
-			$< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \
+			$< -o $(ObjDir)/$*.ll -S $(LLVMCC_EMITIR_FLAG) ; \
 	        $(BC_DEPEND_MOVEFILE)
 
 $(ObjDir)/%.ll: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
 	$(Echo) "Compiling $*.m for $(BuildMode) build (bytecode)"
 	$(Verb) if $(BCCompile.C) $(BC_DEPEND_OPTIONS) \
-			$< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \
+			$< -o $(ObjDir)/$*.ll -S $(LLVMCC_EMITIR_FLAG) ; \
 	        $(BC_DEPEND_MOVEFILE)
 
 # Provide alternate rule sets if dependencies are disabled
@@ -1562,23 +1596,23 @@ $(ObjDir)/%.o: %.m $(ObjDir)/.dir $(BUILT_SOURCES)
 
 $(ObjDir)/%.ll: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.cpp for $(BuildMode) build (bytecode)"
-	$(BCCompile.CXX) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG)
+	$(BCCompile.CXX) $< -o $@ -S $(LLVMCC_EMITIR_FLAG)
 
 $(ObjDir)/%.ll: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.mm for $(BuildMode) build (bytecode)"
-	$(BCCompile.CXX) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG)
+	$(BCCompile.CXX) $< -o $@ -S $(LLVMCC_EMITIR_FLAG)
 
 $(ObjDir)/%.ll: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.cc for $(BuildMode) build (bytecode)"
-	$(BCCompile.CXX) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG)
+	$(BCCompile.CXX) $< -o $@ -S $(LLVMCC_EMITIR_FLAG)
 
 $(ObjDir)/%.ll: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
 	$(Echo) "Compiling $*.c for $(BuildMode) build (bytecode)"
-	$(BCCompile.C) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG)
+	$(BCCompile.C) $< -o $@ -S $(LLVMCC_EMITIR_FLAG)
 
 $(ObjDir)/%.ll: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
 	$(Echo) "Compiling $*.m for $(BuildMode) build (bytecode)"
-	$(BCCompile.C) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG)
+	$(BCCompile.C) $< -o $@ -S $(LLVMCC_EMITIR_FLAG)
 
 endif
 
@@ -1990,7 +2024,7 @@ $(DistZip) : $(TopDistDir)/.makedistdir
 	$(Verb) cd $(PROJ_OBJ_ROOT) ; $(ZIP) -rq $(DistZip) $(DistName)
 
 dist :: $(DistTarGZip) $(DistTarBZ2) $(DistZip)
-	$(Echo) ===== DISTRIBUTION PACKAGING SUCESSFUL =====
+	$(Echo) ===== DISTRIBUTION PACKAGING SUCCESSFUL =====
 
 DistCheckDir := $(PROJ_OBJ_ROOT)/_distcheckdir
 
@@ -2139,8 +2173,13 @@ install-local::
 	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_includedir)
 	$(Verb) if test -d "$(PROJ_SRC_ROOT)/include" ; then \
 	  cd $(PROJ_SRC_ROOT)/include && \
-	  for  hdr in `find . -type f '!' '(' -name '*~' \
-	      -o -name '.#*' -o -name '*.in' ')' -print | grep -v CVS | \
+	  for hdr in `find . -type f \
+	      '(' -name LICENSE.TXT \
+	       -o -name '*.def' \
+	       -o -name '*.h' \
+	       -o -name '*.inc' \
+	       -o -name '*.td' \
+	      ')' -print | grep -v CVS | \
 	      grep -v .svn` ; do \
 	    instdir=`dirname "$(DESTDIR)$(PROJ_includedir)/$$hdr"` ; \
 	    if test \! -d "$$instdir" ; then \
@@ -2153,7 +2192,19 @@ install-local::
 ifneq ($(PROJ_SRC_ROOT),$(PROJ_OBJ_ROOT))
 	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/include" ; then \
 	  cd $(PROJ_OBJ_ROOT)/include && \
-	  for hdr in `find . -type f -print | grep -v CVS` ; do \
+	  for hdr in `find . -type f \
+	      '(' -name LICENSE.TXT \
+	       -o -name '*.def' \
+	       -o -name '*.h' \
+	       -o -name '*.inc' \
+	       -o -name '*.td' \
+	      ')' -print | grep -v CVS | \
+	      grep -v .svn` ; do \
+	    instdir=`dirname "$(DESTDIR)$(PROJ_includedir)/$$hdr"` ; \
+	    if test \! -d "$$instdir" ; then \
+	      $(EchoCmd) Making install directory $$instdir ; \
+	      $(MKDIR) $$instdir ;\
+	    fi ; \
 	    $(DataInstall) $$hdr $(DESTDIR)$(PROJ_includedir)/$$hdr ; \
 	  done ; \
 	fi
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 9259633..b55f564 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -31,7 +31,7 @@ dnl===
 dnl===-----------------------------------------------------------------------===
 dnl Initialize autoconf and define the package name, version number and
 dnl email address for reporting bugs.
-AC_INIT([[llvm]],[[2.9svn]],[llvmbugs@cs.uiuc.edu])
+AC_INIT([[llvm]],[[3.0svn]],[llvmbugs@cs.uiuc.edu])
 
 dnl Provide a copyright substitution and ensure the copyright notice is included
 dnl in the output of --version option of the generated configure script.
@@ -657,12 +657,19 @@ for a_target in $TARGETS_TO_BUILD; do
     LLVM_NATIVE_TARGET="LLVMInitialize${LLVM_NATIVE_ARCH}Target"
     LLVM_NATIVE_TARGETINFO="LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo"
     LLVM_NATIVE_ASMPRINTER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter"
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then
+      LLVM_NATIVE_ASMPARSER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser"
+    fi
     AC_DEFINE_UNQUOTED(LLVM_NATIVE_TARGET, $LLVM_NATIVE_TARGET,
       [LLVM name for the native Target init function, if available])
     AC_DEFINE_UNQUOTED(LLVM_NATIVE_TARGETINFO, $LLVM_NATIVE_TARGETINFO,
       [LLVM name for the native TargetInfo init function, if available])
     AC_DEFINE_UNQUOTED(LLVM_NATIVE_ASMPRINTER, $LLVM_NATIVE_ASMPRINTER,
       [LLVM name for the native AsmPrinter init function, if available])
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then
+      AC_DEFINE_UNQUOTED(LLVM_NATIVE_ASMPARSER, $LLVM_NATIVE_ASMPARSER,
+       [LLVM name for the native AsmParser init function, if available])
+    fi
   fi
 done
 
@@ -1422,6 +1429,24 @@ if test "$llvm_cv_os_type" = "MingW" ; then
   AC_CHECK_LIB(gcc,__cmpdi2,AC_DEFINE([HAVE___CMPDI2],[1],[Have host's __cmpdi2]))
 fi
 
+dnl Check Win32 API EnumerateLoadedModules.
+if test "$llvm_cv_os_type" = "MingW" ; then
+  AC_MSG_CHECKING([whether EnumerateLoadedModules() accepts new decl])
+  AC_COMPILE_IFELSE([[#include <windows.h>
+#include <imagehlp.h>
+extern void foo(PENUMLOADED_MODULES_CALLBACK);
+extern void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID));]],
+[
+  AC_MSG_RESULT([yes])
+  llvm_cv_win32_elmcb_pcstr="PCSTR"
+],
+[
+  AC_MSG_RESULT([no])
+  llvm_cv_win32_elmcb_pcstr="PSTR"
+])
+  AC_DEFINE_UNQUOTED([WIN32_ELMCB_PCSTR],$llvm_cv_win32_elmcb_pcstr,[Type of 1st arg on ELM Callback])
+fi
+
 dnl Check for variations in the Standard C++ library and STL. These macros are
 dnl provided by LLVM in the autoconf/m4 directory.
 AC_FUNC_ISNAN
diff --git a/autoconf/m4/libtool.m4 b/autoconf/m4/libtool.m4
index a8b5e6a..e89738c 100644
--- a/autoconf/m4/libtool.m4
+++ b/autoconf/m4/libtool.m4
@@ -1118,7 +1118,7 @@ if test -n "$_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)" || \
    test -n "$_LT_AC_TAGVAR(runpath_var, $1)" || \
    test "X$_LT_AC_TAGVAR(hardcode_automatic, $1)" = "Xyes" ; then
 
-  # We can hardcode non-existant directories.
+  # We can hardcode non-existent directories.
   if test "$_LT_AC_TAGVAR(hardcode_direct, $1)" != no &&
      # If the only mechanism to avoid hardcoding is shlibpath_var, we
      # have to relink, otherwise we might link with an installed library
diff --git a/autoconf/m4/ltdl.m4 b/autoconf/m4/ltdl.m4
index bc9e2ad..407a16e 100644
--- a/autoconf/m4/ltdl.m4
+++ b/autoconf/m4/ltdl.m4
@@ -156,7 +156,7 @@ AC_CACHE_CHECK([whether deplibs are loaded by dlopen],
   osf[[1234]]*)
     # dlopen did load deplibs (at least at 4.x), but until the 5.x series,
     # it did *not* use an RPATH in a shared library to find objects the
-    # library depends on, so we explictly say `no'.
+    # library depends on, so we explicitly say `no'.
     libltdl_cv_sys_dlopen_deplibs=no
     ;;
   osf5.0|osf5.0a|osf5.1)
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index c2fe431..c1b22d4 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -224,6 +224,7 @@ check_type_exists(error_t errno.h HAVE_ERROR_T)
 # available programs checks
 function(llvm_find_program name)
   string(TOUPPER ${name} NAME)
+  string(REGEX REPLACE "\\." "_" NAME ${NAME})
   find_program(LLVM_PATH_${NAME} ${name})
   mark_as_advanced(LLVM_PATH_${NAME})
   if(LLVM_PATH_${NAME})
@@ -241,6 +242,7 @@ llvm_find_program(neato)
 llvm_find_program(fdp)
 llvm_find_program(dot)
 llvm_find_program(dotty)
+llvm_find_program(xdot.py)
 
 if( LLVM_ENABLE_FFI )
   find_path(FFI_INCLUDE_PATH ffi.h PATHS ${FFI_INCLUDE_DIR})
@@ -269,6 +271,10 @@ if( LLVM_ENABLE_FFI )
   check_symbol_exists(ffi_call ${FFI_HEADER} HAVE_FFI_CALL)
   list(REMOVE_ITEM CMAKE_REQUIRED_INCLUDES ${FFI_INCLUDE_PATH})
   list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ${FFI_LIBRARY_PATH})
+else()
+  unset(HAVE_FFI_FFI_H CACHE)
+  unset(HAVE_FFI_H CACHE)
+  unset(HAVE_FFI_CALL CACHE)
 endif( LLVM_ENABLE_FFI )
 
 # Define LLVM_MULTITHREADED if gcc atomic builtins exists.
@@ -319,24 +325,19 @@ elseif (LLVM_NATIVE_ARCH MATCHES "xcore")
 elseif (LLVM_NATIVE_ARCH MATCHES "msp430")
   set(LLVM_NATIVE_ARCH MSP430)
 else ()
-  message(STATUS
-    "Unknown architecture ${LLVM_NATIVE_ARCH}; lli will not JIT code")
-  set(LLVM_NATIVE_ARCH)
+  message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}")
 endif ()
 
-if (LLVM_NATIVE_ARCH)
-  list(FIND LLVM_TARGETS_TO_BUILD ${LLVM_NATIVE_ARCH} NATIVE_ARCH_IDX)
-  if (NATIVE_ARCH_IDX EQUAL -1)
-    message(STATUS
-      "Native target ${LLVM_NATIVE_ARCH} is not selected; lli will not JIT code")
-    set(LLVM_NATIVE_ARCH)
-  else ()
-    message(STATUS "Native target architecture is ${LLVM_NATIVE_ARCH}")
-    set(LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target)
-    set(LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo)
-    set(LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter)
-  endif ()
-endif()
+list(FIND LLVM_TARGETS_TO_BUILD ${LLVM_NATIVE_ARCH} NATIVE_ARCH_IDX)
+if (NATIVE_ARCH_IDX EQUAL -1)
+  message(STATUS
+    "Native target ${LLVM_NATIVE_ARCH} is not selected; lli will not JIT code")
+else ()
+  message(STATUS "Native target architecture is ${LLVM_NATIVE_ARCH}")
+  set(LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target)
+  set(LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo)
+  set(LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter)
+endif ()
 
 if( MINGW )
   set(HAVE_LIBIMAGEHLP 1)
@@ -365,6 +366,21 @@ else( MSVC )
   set(LTDL_DLOPEN_DEPLIBS 0)  # TODO
 endif( MSVC )
 
+if( PURE_WINDOWS )
+  CHECK_CXX_SOURCE_COMPILES("
+    #include <windows.h>
+    #include <imagehlp.h>
+    extern \"C\" void foo(PENUMLOADED_MODULES_CALLBACK);
+    extern \"C\" void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID));
+    int main(){return 0;}"
+    HAVE_ELMCB_PCSTR)
+  if( HAVE_ELMCB_PCSTR )
+    set(WIN32_ELMCB_PCSTR "PCSTR")
+  else()
+    set(WIN32_ELMCB_PCSTR "PSTR")
+  endif()
+endif( PURE_WINDOWS )
+
 # FIXME: Signal handler return type, currently hardcoded to 'void'
 set(RETSIGTYPE void)
 
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index 764c659..c13143b 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -1,5 +1,5 @@
 include(LLVMProcessSources)
-include(LLVMConfig)
+include(LLVM-Config)
 
 macro(add_llvm_library name)
   llvm_process_sources( ALL_FILES ${ARGN} )
@@ -10,13 +10,20 @@ macro(add_llvm_library name)
   endif( LLVM_COMMON_DEPENDS )
 
   if( BUILD_SHARED_LIBS )
-    get_system_libs(sl)
-    target_link_libraries( ${name} ${sl} )
+    llvm_config( ${name} ${LLVM_LINK_COMPONENTS} )
   endif()
 
-  install(TARGETS ${name}
-    LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
-    ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX})
+  # Ensure that the system libraries always comes last on the
+  # list. Without this, linking the unit tests on MinGW fails.
+  link_system_libs( ${name} )
+
+  if( EXCLUDE_FROM_ALL )
+    set_target_properties( ${name} PROPERTIES EXCLUDE_FROM_ALL ON)
+  else()
+    install(TARGETS ${name}
+      LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
+      ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX})
+  endif()
   # The LLVM Target library shall be built before its sublibraries
   # (asmprinter, etc) because those may use tablegenned files which
   # generation is triggered by the main LLVM target library. Necessary
@@ -45,15 +52,22 @@ ${name} ignored.")
     add_library( ${name} ${libkind} ${ALL_FILES} )
     set_target_properties( ${name} PROPERTIES PREFIX "" )
 
+    llvm_config( ${name} ${LLVM_LINK_COMPONENTS} )
+    link_system_libs( ${name} )
+
     if (APPLE)
       # Darwin-specific linker flags for loadable modules.
       set_target_properties(${name} PROPERTIES
         LINK_FLAGS "-Wl,-flat_namespace -Wl,-undefined -Wl,suppress")
     endif()
 
-    install(TARGETS ${name}
-      LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
-      ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX})
+    if( EXCLUDE_FROM_ALL )
+      set_target_properties( ${name} PROPERTIES EXCLUDE_FROM_ALL ON)
+    else()
+      install(TARGETS ${name}
+	LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
+	ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX})
+    endif()
   endif()
 
   set_target_properties(${name} PROPERTIES FOLDER "Loadable modules")
@@ -68,23 +82,12 @@ macro(add_llvm_executable name)
     add_executable(${name} ${ALL_FILES})
   endif()
   set(EXCLUDE_FROM_ALL OFF)
-  if( LLVM_USED_LIBS )
-    foreach(lib ${LLVM_USED_LIBS})
-      target_link_libraries( ${name} ${lib} )
-    endforeach(lib)
-  endif( LLVM_USED_LIBS )
-  if( LLVM_LINK_COMPONENTS )
-    llvm_config(${name} ${LLVM_LINK_COMPONENTS})
-  endif( LLVM_LINK_COMPONENTS )
+  target_link_libraries( ${name} ${LLVM_USED_LIBS} )
+  llvm_config( ${name} ${LLVM_LINK_COMPONENTS} )
   if( LLVM_COMMON_DEPENDS )
     add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} )
   endif( LLVM_COMMON_DEPENDS )
-  if( NOT MINGW )
-    get_system_libs(llvm_system_libs)
-    if( llvm_system_libs )
-      target_link_libraries(${name} ${llvm_system_libs})
-    endif()
-  endif()
+  link_system_libs( ${name} )
 endmacro(add_llvm_executable name)
 
 
diff --git a/cmake/modules/CMakeLists.txt b/cmake/modules/CMakeLists.txt
index 1ab9474..257deb6 100644
--- a/cmake/modules/CMakeLists.txt
+++ b/cmake/modules/CMakeLists.txt
@@ -1,15 +1,22 @@
 set(llvm_cmake_builddir "${LLVM_BINARY_DIR}/share/llvm/cmake")
+set(LLVM_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
 
 get_property(llvm_libs GLOBAL PROPERTY LLVM_LIBS)
 
 configure_file(
-  LLVM.cmake
-  ${llvm_cmake_builddir}/LLVM.cmake
+  LLVMConfig.cmake.in
+  ${llvm_cmake_builddir}/LLVMConfig.cmake
+  @ONLY)
+
+configure_file(
+  LLVMConfigVersion.cmake.in
+  ${llvm_cmake_builddir}/LLVMConfigVersion.cmake
   @ONLY)
 
 install(FILES
-  ${llvm_cmake_builddir}/LLVM.cmake
-  LLVMConfig.cmake
+  ${llvm_cmake_builddir}/LLVMConfig.cmake
+  ${llvm_cmake_builddir}/LLVMConfigVersion.cmake
+  LLVM-Config.cmake
   LLVMLibDeps.cmake
   DESTINATION share/llvm/cmake)
 
@@ -17,16 +24,11 @@ install(DIRECTORY .
   DESTINATION share/llvm/cmake
   FILES_MATCHING PATTERN *.cmake
   PATTERN .svn EXCLUDE
-  PATTERN LLVM.cmake EXCLUDE
   PATTERN LLVMConfig.cmake EXCLUDE
+  PATTERN LLVMConfigVersion.cmake EXCLUDE
+  PATTERN LLVM-Config.cmake EXCLUDE
   PATTERN LLVMLibDeps.cmake EXCLUDE
   PATTERN FindBison.cmake EXCLUDE
   PATTERN GetTargetTriple.cmake EXCLUDE
   PATTERN VersionFromVCS.cmake EXCLUDE
   PATTERN CheckAtomic.cmake EXCLUDE)
-
-install(FILES
-  ${llvm_cmake_builddir}/LLVM.cmake
-  LLVMConfig.cmake
-  LLVMLibDeps.cmake
-  DESTINATION share/llvm/cmake)
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
index f62e86a..0633ac9 100644
--- a/cmake/modules/HandleLLVMOptions.cmake
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -84,7 +84,7 @@ if( LLVM_ENABLE_PIC )
     if( SUPPORTS_FPIC_FLAG )
       message(STATUS "Building with -fPIC")
       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
-      set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
+      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
     else( SUPPORTS_FPIC_FLAG )
       message(WARNING "-fPIC not supported.")
     endif()
@@ -102,6 +102,29 @@ if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
   endif( LLVM_BUILD_32_BITS )
 endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
 
+if( MSVC_IDE AND ( MSVC90 OR MSVC10 ) )
+  # Only Visual Studio 2008 and 2010 officially supports /MP.
+  # Visual Studio 2005 do support it but it's experimental there.
+  set(LLVM_COMPILER_JOBS "0" CACHE STRING
+    "Number of parallel compiler jobs. 0 means use all processors. Default is 0.")
+  if( NOT LLVM_COMPILER_JOBS STREQUAL "1" )
+    if( LLVM_COMPILER_JOBS STREQUAL "0" )
+      add_llvm_definitions( /MP )
+    else()
+      if (MSVC10)
+        message(FATAL_ERROR
+          "Due to a bug in CMake only 0 and 1 is supported for "
+          "LLVM_COMPILER_JOBS when generating for Visual Studio 2010")
+      else()
+        message(STATUS "Number of parallel compiler jobs set to " ${LLVM_COMPILER_JOBS})
+        add_llvm_definitions( /MP${LLVM_COMPILER_JOBS} )
+      endif()
+    endif()
+  else()
+    message(STATUS "Parallel compilation disabled")
+  endif()
+endif()
+
 if( MSVC )
   include(ChooseMSVCCRT)
 
@@ -130,7 +153,7 @@ if( MSVC )
     -wd4715 # Suppress ''function' : not all control paths return a value'
     -wd4800 # Suppress ''type' : forcing value to bool 'true' or 'false' (performance warning)'
     -wd4065 # Suppress 'switch statement contains 'default' but no 'case' labels'
-
+    -wd4181 # Suppress 'qualifier applied to reference type; ignored'
     -w14062 # Promote "enumerator in switch of enum is not handled" to level 1 warning.
     )
 
diff --git a/cmake/modules/LLVMConfig.cmake b/cmake/modules/LLVM-Config.cmake
index 349544e..a6286fe 100755
--- a/cmake/modules/LLVMConfig.cmake
+++ b/cmake/modules/LLVM-Config.cmake
@@ -16,6 +16,12 @@ function(get_system_libs return_var)
 endfunction(get_system_libs)
 
 
+function(link_system_libs target)
+  get_system_libs(llvm_system_libs)
+  target_link_libraries(${target} ${llvm_system_libs})
+endfunction(link_system_libs)
+
+
 function(is_llvm_target_library library return_var)
   # Sets variable `return_var' to ON if `library' corresponds to a
   # LLVM supported target. To OFF if it doesn't.
@@ -23,7 +29,8 @@ function(is_llvm_target_library library return_var)
   string(TOUPPER "${library}" capitalized_lib)
   string(TOUPPER "${LLVM_ALL_TARGETS}" targets)
   foreach(t ${targets})
-    if( capitalized_lib STREQUAL "LLVM${t}" OR
+    if( capitalized_lib STREQUAL t OR
+	capitalized_lib STREQUAL "LLVM${t}" OR
 	capitalized_lib STREQUAL "LLVM${t}CODEGEN" OR
 	capitalized_lib STREQUAL "LLVM${t}ASMPARSER" OR
 	capitalized_lib STREQUAL "LLVM${t}ASMPRINTER" OR
@@ -61,6 +68,26 @@ function(explicit_map_components_to_libraries out_libs)
   set( link_components ${ARGN} )
   get_property(llvm_libs GLOBAL PROPERTY LLVM_LIBS)
   string(TOUPPER "${llvm_libs}" capitalized_libs)
+
+  # Expand some keywords:
+  list(FIND LLVM_TARGETS_TO_BUILD "${LLVM_NATIVE_ARCH}" have_native_backend)
+  list(FIND link_components "engine" engine_required)
+  if( NOT engine_required EQUAL -1 )
+    list(FIND LLVM_TARGETS_WITH_JIT "${LLVM_NATIVE_ARCH}" have_jit)
+    if( NOT have_native_backend EQUAL -1 AND NOT have_jit EQUAL -1 )
+      list(APPEND link_components "jit")
+      list(APPEND link_components "native")
+    else()
+      list(APPEND link_components "interpreter")
+    endif()
+  endif()
+  list(FIND link_components "native" native_required)
+  if( NOT native_required EQUAL -1 )
+    if( NOT have_native_backend EQUAL -1 )
+      list(APPEND link_components ${LLVM_NATIVE_ARCH})
+    endif()
+  endif()
+
   # Translate symbolic component names to real libraries:
   foreach(c ${link_components})
     # add codegen, asmprinter, asmparser, disassembler
@@ -94,14 +121,13 @@ function(explicit_map_components_to_libraries out_libs)
         list(APPEND expanded_components "LLVM${c}Disassembler")
       endif()
     elseif( c STREQUAL "native" )
-      list(APPEND expanded_components "LLVM${LLVM_NATIVE_ARCH}CodeGen")
+      # already processed
     elseif( c STREQUAL "nativecodegen" )
       list(APPEND expanded_components "LLVM${LLVM_NATIVE_ARCH}CodeGen")
     elseif( c STREQUAL "backend" )
       # same case as in `native'.
     elseif( c STREQUAL "engine" )
-      # TODO: as we assume we are on X86, this is `jit'.
-      list(APPEND expanded_components "LLVMJIT")
+      # already processed
     elseif( c STREQUAL "all" )
       list(APPEND expanded_components ${llvm_libs})
     else( NOT idx LESS 0 )
@@ -109,7 +135,7 @@ function(explicit_map_components_to_libraries out_libs)
       string(TOUPPER "${c}" capitalized)
       list(FIND capitalized_libs LLVM${capitalized} lib_idx)
       if( lib_idx LESS 0 )
-	# The component is unkown. Maybe is an ommitted target?
+	# The component is unknown. Maybe is an omitted target?
 	is_llvm_target_library(${c} iltl_result)
 	if( NOT iltl_result )
 	  message(FATAL_ERROR "Library `${c}' not found in list of llvm libraries.")
diff --git a/cmake/modules/LLVM.cmake b/cmake/modules/LLVMConfig.cmake.in
index 9182afd..5a048b7 100644
--- a/cmake/modules/LLVM.cmake
+++ b/cmake/modules/LLVMConfig.cmake.in
@@ -10,6 +10,8 @@ set(LLVM_ALL_TARGETS @LLVM_ALL_TARGETS@)
 
 set(LLVM_TARGETS_TO_BUILD @LLVM_TARGETS_TO_BUILD@)
 
+set(LLVM_TARGETS_WITH_JIT @LLVM_TARGETS_WITH_JIT@)
+
 set(TARGET_TRIPLE "@TARGET_TRIPLE@")
 
 set(LLVM_TOOLS_BINARY_DIR @LLVM_TOOLS_BINARY_DIR@)
@@ -20,21 +22,26 @@ set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@)
 
 set(LLVM_ENABLE_PIC @LLVM_ENABLE_PIC@)
 
-set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS)
-
 set(HAVE_LIBDL @HAVE_LIBDL@)
-set(HAVE_LIBPTHREAD @HAVE_LIBPTHREAD)
+set(HAVE_LIBPTHREAD @HAVE_LIBPTHREAD@)
+set(LLVM_ON_UNIX @LLVM_ON_UNIX@)
+set(LLVM_ON_WIN32 @LLVM_ON_WIN32@)
+
+set(LLVM_INSTALL_PREFIX @LLVM_INSTALL_PREFIX@)
+set(LLVM_INCLUDE_DIRS ${LLVM_INSTALL_PREFIX}/include)
+set(LLVM_LIBRARY_DIRS ${LLVM_INSTALL_PREFIX}/lib)
+set(LLVM_DEFINITIONS "-D__STDC_LIMIT_MACROS" "-D__STDC_CONSTANT_MACROS")
 
 # We try to include using the current setting of CMAKE_MODULE_PATH,
 # which suppossedly was filled by the user with the directory where
 # this file was installed:
-include( LLVMConfig OPTIONAL RESULT_VARIABLE LLVMCONFIG_INCLUDED )
+include( LLVM-Config OPTIONAL RESULT_VARIABLE LLVMCONFIG_INCLUDED )
 
 # If failed, we assume that this is an un-installed build:
 if( NOT LLVMCONFIG_INCLUDED )
   set(CMAKE_MODULE_PATH
     ${CMAKE_MODULE_PATH}
     "@LLVM_SOURCE_DIR@/cmake/modules")
-  include( LLVMConfig )
+  include( LLVM-Config )
 endif()
 
diff --git a/cmake/modules/LLVMConfigVersion.cmake.in b/cmake/modules/LLVMConfigVersion.cmake.in
new file mode 100644
index 0000000..add5aa9
--- /dev/null
+++ b/cmake/modules/LLVMConfigVersion.cmake.in
@@ -0,0 +1 @@
+set(PACKAGE_VERSION "@PACKAGE_VERSION@")
+\ No newline at end of file
diff --git a/cmake/modules/LLVMLibDeps.cmake b/cmake/modules/LLVMLibDeps.cmake
index afba85e..509ac52 100644
--- a/cmake/modules/LLVMLibDeps.cmake
+++ b/cmake/modules/LLVMLibDeps.cmake
@@ -30,11 +30,11 @@ set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVM
 set(MSVC_LIB_DEPS_LLVMMBlazeAsmParser LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC LLVMMCParser LLVMSupport LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMMBlazeAsmPrinter LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMMBlazeCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMBlazeAsmPrinter LLVMMBlazeInfo LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMMBlazeDisassembler LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMBlazeDisassembler LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC)
 set(MSVC_LIB_DEPS_LLVMMBlazeInfo LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMMCDisassembler LLVMARMAsmParser LLVMARMCodeGen LLVMARMDisassembler LLVMARMInfo LLVMAlphaCodeGen LLVMAlphaInfo LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCBackend LLVMCBackendInfo LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCppBackend LLVMCppBackendInfo LLVMMBlazeAsmParser LLVMMBlazeCodeGen LLVMMBlazeDisassembler LLVMMBlazeInfo LLVMMC LLVMMCParser LLVMMSP430CodeGen LLVMMSP430Info LLVMMipsCodeGen LLVMMipsInfo LLVMPTXCodeGen LLVMPTXInfo LLVMPowerPCCodeGen LLVMPowerPCInfo LLVMSparcCodeGen LLVMSparcInfo LLVMSupport LLVMSystemZCodeGen LLVMSystemZInfo LLVMX86AsmParser LLVMX86CodeGen LLVMX86Disassembler LLVMX86Info LLVMXCoreCodeGen LLVMXCoreInfo)
-set(MSVC_LIB_DEPS_LLVMMCJIT LLVMExecutionEngine LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMCDisassembler LLVMARMAsmParser LLVMARMCodeGen LLVMARMDisassembler LLVMARMInfo LLVMAlphaCodeGen LLVMAlphaInfo LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCBackend LLVMCBackendInfo LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCppBackend LLVMCppBackendInfo LLVMMBlazeAsmParser LLVMMBlazeCodeGen LLVMMBlazeDisassembler LLVMMBlazeInfo LLVMMC LLVMMCParser LLVMMSP430CodeGen LLVMMSP430Info LLVMMipsCodeGen LLVMMipsInfo LLVMPTXCodeGen LLVMPTXInfo LLVMPowerPCCodeGen LLVMPowerPCInfo LLVMSparcCodeGen LLVMSparcInfo LLVMSupport LLVMSystemZCodeGen LLVMSystemZInfo LLVMTarget LLVMX86AsmParser LLVMX86CodeGen LLVMX86Disassembler LLVMX86Info LLVMXCoreCodeGen LLVMXCoreInfo)
+set(MSVC_LIB_DEPS_LLVMMCJIT LLVMCore LLVMExecutionEngine LLVMRuntimeDyld LLVMSupport LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMMCParser LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430AsmPrinter LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMTarget)
@@ -47,6 +47,7 @@ set(MSVC_LIB_DEPS_LLVMPTXInfo LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCAsmPrinter LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMRuntimeDyld LLVMObject LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMInstCombine LLVMSupport LLVMTarget LLVMTransformUtils)
 set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMTarget LLVMTransformUtils)
 set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMTarget)
@@ -61,7 +62,7 @@ set(MSVC_LIB_DEPS_LLVMX86AsmPrinter LLVMMC LLVMSupport LLVMX86Utils)
 set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMX86AsmPrinter LLVMX86Info LLVMX86Utils)
 set(MSVC_LIB_DEPS_LLVMX86Disassembler LLVMMC LLVMSupport LLVMX86Info)
 set(MSVC_LIB_DEPS_LLVMX86Info LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMX86Utils LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMX86Utils LLVMCore LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMXCoreCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMXCoreInfo)
 set(MSVC_LIB_DEPS_LLVMXCoreInfo LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMipa LLVMAnalysis LLVMCore LLVMSupport)
diff --git a/configure b/configure
index 959822f..f1f4dd3 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for llvm 2.9svn.
+# Generated by GNU Autoconf 2.60 for llvm 3.0svn.
 #
 # Report bugs to <llvmbugs@cs.uiuc.edu>.
 #
@@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='llvm'
 PACKAGE_TARNAME='-llvm-'
-PACKAGE_VERSION='2.9svn'
-PACKAGE_STRING='llvm 2.9svn'
+PACKAGE_VERSION='3.0svn'
+PACKAGE_STRING='llvm 3.0svn'
 PACKAGE_BUGREPORT='llvmbugs@cs.uiuc.edu'
 
 ac_unique_file="lib/VMCore/Module.cpp"
@@ -1328,7 +1328,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures llvm 2.9svn to adapt to many kinds of systems.
+\`configure' configures llvm 3.0svn to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1394,7 +1394,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of llvm 2.9svn:";;
+     short | recursive ) echo "Configuration of llvm 3.0svn:";;
    esac
   cat <<\_ACEOF
 
@@ -1551,7 +1551,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-llvm configure 2.9svn
+llvm configure 3.0svn
 generated by GNU Autoconf 2.60
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1567,7 +1567,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by llvm $as_me 2.9svn, which was
+It was created by llvm $as_me 3.0svn, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   $ $0 $@
@@ -5117,6 +5117,9 @@ _ACEOF
     LLVM_NATIVE_TARGET="LLVMInitialize${LLVM_NATIVE_ARCH}Target"
     LLVM_NATIVE_TARGETINFO="LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo"
     LLVM_NATIVE_ASMPRINTER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter"
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then
+      LLVM_NATIVE_ASMPARSER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser"
+    fi
 
 cat >>confdefs.h <<_ACEOF
 #define LLVM_NATIVE_TARGET $LLVM_NATIVE_TARGET
@@ -5132,6 +5135,13 @@ cat >>confdefs.h <<_ACEOF
 #define LLVM_NATIVE_ASMPRINTER $LLVM_NATIVE_ASMPRINTER
 _ACEOF
 
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_ASMPARSER $LLVM_NATIVE_ASMPARSER
+_ACEOF
+
+    fi
   fi
 done
 
@@ -11561,7 +11571,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 11564 "configure"
+#line 11574 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11713,7 +11723,7 @@ else
   osf[1234]*)
     # dlopen did load deplibs (at least at 4.x), but until the 5.x series,
     # it did *not* use an RPATH in a shared library to find objects the
-    # library depends on, so we explictly say `no'.
+    # library depends on, so we explicitly say `no'.
     libltdl_cv_sys_dlopen_deplibs=no
     ;;
   osf5.0|osf5.0a|osf5.1)
@@ -20650,6 +20660,73 @@ fi
 
 fi
 
+if test "$llvm_cv_os_type" = "MingW" ; then
+  { echo "$as_me:$LINENO: checking whether EnumerateLoadedModules() accepts new decl" >&5
+echo $ECHO_N "checking whether EnumerateLoadedModules() accepts new decl... $ECHO_C" >&6; }
+  cat >conftest.$ac_ext <<_ACEOF
+#include <windows.h>
+#include <imagehlp.h>
+extern void foo(PENUMLOADED_MODULES_CALLBACK);
+extern void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID));
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+
+  { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+  llvm_cv_win32_elmcb_pcstr="PCSTR"
+
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+  llvm_cv_win32_elmcb_pcstr="PSTR"
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+cat >>confdefs.h <<_ACEOF
+#define WIN32_ELMCB_PCSTR $llvm_cv_win32_elmcb_pcstr
+_ACEOF
+
+fi
+
 
 { echo "$as_me:$LINENO: checking for isnan in <math.h>" >&5
 echo $ECHO_N "checking for isnan in <math.h>... $ECHO_C" >&6; }
@@ -22942,7 +23019,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by llvm $as_me 2.9svn, which was
+This file was extended by llvm $as_me 3.0svn, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -22995,7 +23072,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-llvm config.status 2.9svn
+llvm config.status 3.0svn
 configured by $0, generated by GNU Autoconf 2.60,
   with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 
diff --git a/docs/AliasAnalysis.html b/docs/AliasAnalysis.html
index 20b7e96..b93cff0 100644
--- a/docs/AliasAnalysis.html
+++ b/docs/AliasAnalysis.html
@@ -7,9 +7,9 @@
 </head>
 <body>
 
-<div class="doc_title">
+<h1>
   LLVM Alias Analysis Infrastructure
-</div>
+</h1>
 
 <ol>
   <li><a href="#introduction">Introduction</a></li>
@@ -59,12 +59,12 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="introduction">Introduction</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Alias Analysis (aka Pointer Analysis) is a class of techniques which attempt
 to determine whether or not two pointers ever can point to the same object in
@@ -96,12 +96,12 @@ know</a>.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="overview"><tt>AliasAnalysis</tt> Class Overview</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The <a
 href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html"><tt>AliasAnalysis</tt></a>
@@ -122,14 +122,12 @@ multiple values, values which are not
 <a href="LangRef.html#constants">constants</a> are all defined within the
 same function.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="pointers">Representation of Pointers</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Most importantly, the <tt>AliasAnalysis</tt> class provides several methods
 which are used to query whether or not two memory objects alias, whether
@@ -181,11 +179,11 @@ that the accesses alias.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="alias">The <tt>alias</tt> method</a>
-</div>
+</h3>
   
-<div class="doc_text">
+<div>
 <p>The <tt>alias</tt> method is the primary interface used to determine whether
 or not two memory objects alias each other.  It takes two memory objects as
 input and returns MustAlias, PartialAlias, MayAlias, or NoAlias as
@@ -194,14 +192,13 @@ appropriate.</p>
 <p>Like all <tt>AliasAnalysis</tt> interfaces, the <tt>alias</tt> method requires
 that either the two pointer values be defined within the same function, or at
 least one of the values is a <a href="LangRef.html#constants">constant</a>.</p>
-</div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="MustMayNo">Must, May, and No Alias Responses</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>The NoAlias response may be used when there is never an immediate dependence
 between any memory reference <i>based</i> on one pointer and any memory
 reference <i>based</i> the other. The most obvious example is when the two
@@ -227,12 +224,14 @@ implies that the pointers compare equal.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ModRefInfo">The <tt>getModRefInfo</tt> methods</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>getModRefInfo</tt> methods return information about whether the
 execution of an instruction can read or modify a memory location.  Mod/Ref
@@ -250,25 +249,23 @@ memory written to by CS2.  Note that this relation is not commutative.</p>
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="OtherItfs">Other useful <tt>AliasAnalysis</tt> methods</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 Several other tidbits of information are often collected by various alias
 analysis implementations and can be put to good use by various clients.
 </p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   The <tt>pointsToConstantMemory</tt> method
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>pointsToConstantMemory</tt> method returns true if and only if the
 analysis can prove that the pointer only points to unchanging memory locations
@@ -279,12 +276,12 @@ memory location to be modified.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="simplemodref">The <tt>doesNotAccessMemory</tt> and
   <tt>onlyReadsMemory</tt> methods</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>These methods are used to provide very simple mod/ref information for
 function calls.  The <tt>doesNotAccessMemory</tt> method returns true for a
@@ -307,13 +304,17 @@ functions that satisfy the <tt>doesNotAccessMemory</tt> method also satisfies
 
 </div>
 
+</div>
+
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="writingnew">Writing a new <tt>AliasAnalysis</tt> Implementation</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Writing a new alias analysis implementation for LLVM is quite
 straight-forward.  There are already several implementations that you can use
@@ -321,14 +322,12 @@ for examples, and the following information should help fill in any details.
 For a examples, take a look at the <a href="#impls">various alias analysis
 implementations</a> included with LLVM.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="passsubclasses">Different Pass styles</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The first step to determining what type of <a
 href="WritingAnLLVMPass.html">LLVM pass</a> you need to use for your Alias
@@ -352,11 +351,11 @@ solve:</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="requiredcalls">Required initialization calls</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Your subclass of <tt>AliasAnalysis</tt> is required to invoke two methods on
 the <tt>AliasAnalysis</tt> base class: <tt>getAnalysisUsage</tt> and
@@ -393,11 +392,11 @@ bool run(Module &amp;M) {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="interfaces">Interfaces which may be specified</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>All of the <a
 href="/doxygen/classllvm_1_1AliasAnalysis.html"><tt>AliasAnalysis</tt></a>
@@ -412,11 +411,11 @@ implementing, you just override the interfaces you can improve.</p>
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="chaining"><tt>AliasAnalysis</tt> chaining behavior</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>With only two special exceptions (the <tt><a
 href="#basic-aa">basicaa</a></tt> and <a href="#no-aa"><tt>no-aa</tt></a>
@@ -451,11 +450,11 @@ updated.</p>
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="updating">Updating analysis results for transformations</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>
 Alias analysis information is initially computed for a static snapshot of the
 program, but clients will use this information to make transformations to the
@@ -471,12 +470,11 @@ their internal data structures are kept up-to-date as the program changes (for
 example, when an instruction is deleted), and clients of alias analysis must be
 sure to call these interfaces appropriately.
 </p>
-</div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">The <tt>deleteValue</tt> method</div>
+<h4>The <tt>deleteValue</tt> method</h4>
 
-<div class="doc_text">
+<div>
 The <tt>deleteValue</tt> method is called by transformations when they remove an
 instruction or any other value from the program (including values that do not
 use pointers).  Typically alias analyses keep data structures that have entries
@@ -485,9 +483,9 @@ any entries for the specified value, if they exist.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">The <tt>copyValue</tt> method</div>
+<h4>The <tt>copyValue</tt> method</h4>
 
-<div class="doc_text">
+<div>
 The <tt>copyValue</tt> method is used when a new value is introduced into the
 program.  There is no way to introduce a value into the program that did not
 exist before (this doesn't make sense for a safe compiler transformation), so
@@ -496,9 +494,9 @@ new value has exactly the same properties as the value being copied.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">The <tt>replaceWithNewValue</tt> method</div>
+<h4>The <tt>replaceWithNewValue</tt> method</h4>
 
-<div class="doc_text">
+<div>
 This method is a simple helper method that is provided to make clients easier to
 use.  It is implemented by copying the old analysis information to the new
 value, then deleting the old value.  This method cannot be overridden by alias
@@ -506,9 +504,9 @@ analysis implementations.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">The <tt>addEscapingUse</tt> method</div>
+<h4>The <tt>addEscapingUse</tt> method</h4>
 
-<div class="doc_text">
+<div>
 <p>The <tt>addEscapingUse</tt> method is used when the uses of a pointer
 value have changed in ways that may invalidate precomputed analysis information. 
 Implementations may either use this callback to provide conservative responses
@@ -527,12 +525,14 @@ uses below:</p>
 </ul>
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="implefficiency">Efficiency Issues</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>From the LLVM perspective, the only thing you need to do to provide an
 efficient alias analysis is to make sure that alias analysis <b>queries</b> are
@@ -544,11 +544,11 @@ method as possible (within reason).</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="limitations">Limitations</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The AliasAnalysis infrastructure has several limitations which make
 writing a new <tt>AliasAnalysis</tt> implementation difficult.</p>
@@ -567,7 +567,7 @@ which are intended to allow a pass to keep an AliasAnalysis consistent,
 however there's no way for a pass to declare in its
 <tt>getAnalysisUsage</tt> that it does so. Some passes attempt to use
 <tt>AU.addPreserved&lt;AliasAnalysis&gt;</tt>, however this doesn't
-actually have any effect.</tt>
+actually have any effect.</p>
 
 <p><tt>AliasAnalysisCounter</tt> (<tt>-count-aa</tt>) and <tt>AliasDebugger</tt>
 (<tt>-debug-aa</tt>) are implemented as <tt>ModulePass</tt> classes, so if your
@@ -616,25 +616,25 @@ from itself.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="using">Using alias analysis results</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>There are several different ways to use alias analysis results.  In order of
 preference, these are...</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="memdep">Using the <tt>MemoryDependenceAnalysis</tt> Pass</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>memdep</tt> pass uses alias analysis to provide high-level dependence
 information about memory-using instructions.  This will tell you which store
@@ -645,11 +645,11 @@ efficient, and is used by Dead Store Elimination, GVN, and memcpy optimizations.
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ast">Using the <tt>AliasSetTracker</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Many transformations need information about alias <b>sets</b> that are active
 in some scope, rather than information about pairwise aliasing.  The <tt><a
@@ -678,14 +678,12 @@ sunk to outside of the loop, promoting the memory location to a register for the
 duration of the loop nest.  Both of these transformations only apply if the
 pointer argument is loop-invariant.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   The AliasSetTracker implementation
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The AliasSetTracker class is implemented to be as efficient as possible.  It
 uses the union-find algorithm to efficiently merge AliasSets when a pointer is
@@ -706,12 +704,14 @@ are.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="direct">Using the <tt>AliasAnalysis</tt> interface directly</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>If neither of these utility class are what your pass needs, you should use
 the interfaces exposed by the <tt>AliasAnalysis</tt> class directly.  Try to use
@@ -721,13 +721,15 @@ best precision and efficiency.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="exist">Existing alias analysis implementations and clients</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>If you're going to be working with the LLVM alias analysis infrastructure,
 you should know what clients and implementations of alias analysis are
@@ -735,28 +737,24 @@ available.  In particular, if you are implementing an alias analysis, you should
 be aware of the <a href="#aliasanalysis-debug">the clients</a> that are useful
 for monitoring and evaluating different implementations.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="impls">Available <tt>AliasAnalysis</tt> implementations</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>This section lists the various implementations of the <tt>AliasAnalysis</tt>
 interface.  With the exception of the <a href="#no-aa"><tt>-no-aa</tt></a> and
 <a href="#basic-aa"><tt>-basicaa</tt></a> implementations, all of these <a
 href="#chaining">chain</a> to other alias analysis implementations.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="no-aa">The <tt>-no-aa</tt> pass</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>-no-aa</tt> pass is just like what it sounds: an alias analysis that
 never returns any useful information.  This pass can be useful if you think that
@@ -766,11 +764,11 @@ problem.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="basic-aa">The <tt>-basicaa</tt> pass</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>-basicaa</tt> pass is an aggressive local analysis that "knows"
 many important facts:</p>
@@ -794,11 +792,11 @@ many important facts:</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="globalsmodref">The <tt>-globalsmodref-aa</tt> pass</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>This pass implements a simple context-sensitive mod/ref and alias analysis
 for internal global variables that don't "have their address taken".  If a
@@ -818,11 +816,11 @@ non-address taken globals), but is very quick analysis.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="steens-aa">The <tt>-steens-aa</tt> pass</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>-steens-aa</tt> pass implements a variation on the well-known
 "Steensgaard's algorithm" for interprocedural alias analysis.  Steensgaard's
@@ -841,11 +839,11 @@ module, it is not part of the LLVM core.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ds-aa">The <tt>-ds-aa</tt> pass</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>-ds-aa</tt> pass implements the full Data Structure Analysis
 algorithm.  Data Structure Analysis is a modular unification-based,
@@ -864,11 +862,11 @@ module, it is not part of the LLVM core.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="scev-aa">The <tt>-scev-aa</tt> pass</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>-scev-aa</tt> pass implements AliasAnalysis queries by
 translating them into ScalarEvolution queries. This gives it a
@@ -877,22 +875,23 @@ and loop induction variables than other alias analyses have.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="aliasanalysis-xforms">Alias analysis driven transformations</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 LLVM includes several alias-analysis driven transformations which can be used
 with any of the implementations above.
-</div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="adce">The <tt>-adce</tt> pass</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>-adce</tt> pass, which implements Aggressive Dead Code Elimination
 uses the <tt>AliasAnalysis</tt> interface to delete calls to functions that do
@@ -902,11 +901,11 @@ not have side-effects and are not used.</p>
 
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="licm">The <tt>-licm</tt> pass</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>-licm</tt> pass implements various Loop Invariant Code Motion related
 transformations.  It uses the <tt>AliasAnalysis</tt> interface for several
@@ -927,11 +926,11 @@ no may aliases to the loaded/stored memory location.</li>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="argpromotion">The <tt>-argpromotion</tt> pass</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>
 The <tt>-argpromotion</tt> pass promotes by-reference arguments to be passed in
 by-value instead.  In particular, if pointer arguments are only loaded from it
@@ -942,38 +941,38 @@ pointer.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="gvn">The <tt>-gvn</tt>, <tt>-memcpyopt</tt>, and <tt>-dse</tt>
      passes</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>These passes use AliasAnalysis information to reason about loads and stores.
 </p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="aliasanalysis-debug">Clients for debugging and evaluation of
   implementations</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>These passes are useful for evaluating the various alias analysis
 implementations.  You can use them with commands like '<tt>opt -ds-aa
 -aa-eval foo.bc -disable-output -stats</tt>'.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="print-alias-sets">The <tt>-print-alias-sets</tt> pass</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>-print-alias-sets</tt> pass is exposed as part of the
 <tt>opt</tt> tool to print out the Alias Sets formed by the <a
@@ -990,11 +989,11 @@ the <tt>AliasSetTracker</tt> class.  To use it, use something like:</p>
 
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="count-aa">The <tt>-count-aa</tt> pass</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>-count-aa</tt> pass is useful to see how many queries a particular
 pass is making and what responses are returned by the alias analysis.  As an
@@ -1014,11 +1013,11 @@ when debugging a transformation or an alias analysis implementation.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="aa-eval">The <tt>-aa-eval</tt> pass</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>-aa-eval</tt> pass simply iterates through all pairs of pointers in a
 function and asks an alias analysis whether or not the pointers alias.  This
@@ -1028,13 +1027,17 @@ algorithm will have a lower number of may aliases).</p>
 
 </div>
 
+</div>
+
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="memdep">Memory Dependence Analysis</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>If you're just looking to be a client of alias analysis information, consider
 using the Memory Dependence Analysis interface instead.  MemDep is a lazy, 
@@ -1056,8 +1059,8 @@ analysis directly.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-01-03 22:38:41 +0100 (Mon, 03 Jan 2011) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-21 03:52:00 +0200 (Thu, 21 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/BitCodeFormat.html b/docs/BitCodeFormat.html
index 8d3d382..9a042a0 100644
--- a/docs/BitCodeFormat.html
+++ b/docs/BitCodeFormat.html
@@ -7,7 +7,7 @@
   <link rel="stylesheet" href="llvm.css" type="text/css">
 </head>
 <body>
-<div class="doc_title"> LLVM Bitcode File Format </div>
+<h1> LLVM Bitcode File Format</h1>
 <ol>
   <li><a href="#abstract">Abstract</a></li>
   <li><a href="#overview">Overview</a></li>
@@ -47,10 +47,10 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"> <a name="abstract">Abstract</a></div>
+<h2><a name="abstract">Abstract</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This document describes the LLVM bitstream file format and the encoding of
 the LLVM IR into it.</p>
@@ -58,10 +58,10 @@ the LLVM IR into it.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"> <a name="overview">Overview</a></div>
+<h2><a name="overview">Overview</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 What is commonly known as the LLVM bitcode file format (also, sometimes
@@ -88,10 +88,10 @@ wrapper format, then describes the record structure used by LLVM IR files.
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"> <a name="bitstream">Bitstream Format</a></div>
+<h2><a name="bitstream">Bitstream Format</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 The bitstream format is literally a stream of bits, with a very simple
@@ -114,13 +114,12 @@ href="CommandGuide/html/llvm-bcanalyzer.html">llvm-bcanalyzer</a> tool can be
 used to dump and inspect arbitrary bitstreams, which is very useful for
 understanding the encoding.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="magic">Magic Numbers</a>
-</div>
+<h3>
+  <a name="magic">Magic Numbers</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The first two bytes of a bitcode file are 'BC' (0x42, 0x43).
 The second two bytes are an application-specific magic number.  Generic
@@ -130,10 +129,11 @@ bitcode, while application-specific programs will want to look at all four.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="primitives">Primitives</a>
-</div>
+<h3>
+  <a name="primitives">Primitives</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 A bitstream literally consists of a stream of bits, which are read in order
@@ -144,13 +144,12 @@ Width Integers</a> or as <a href="#variablewidth">Variable Width
 Integers</a>.
 </p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="fixedwidth">Fixed Width Integers</a>
-</div>
+<h4>
+  <a name="fixedwidth">Fixed Width Integers</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Fixed-width integer values have their low bits emitted directly to the file.
    For example, a 3-bit integer value encodes 1 as 001.  Fixed width integers
@@ -161,10 +160,11 @@ Integers</a>.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="variablewidth">Variable Width
-Integers</a></div>
+<h4>
+  <a name="variablewidth">Variable Width Integers</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Variable-width integer (VBR) values encode values of arbitrary size,
 optimizing for the case where the values are small.  Given a 4-bit VBR field,
@@ -182,9 +182,9 @@ value of 24 (011 << 3) with no continuation.  The sum (3+24) yields the value
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="char6">6-bit characters</a></div>
+<h4><a name="char6">6-bit characters</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p>6-bit characters encode common characters into a fixed 6-bit field.  They
 represent the following characters with the following 6-bit values:</p>
@@ -206,9 +206,9 @@ characters not in the set.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="wordalign">Word Alignment</a></div>
+<h4><a name="wordalign">Word Alignment</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p>Occasionally, it is useful to emit zero bits until the bitstream is a
 multiple of 32 bits.  This ensures that the bit position in the stream can be
@@ -216,12 +216,14 @@ represented as a multiple of 32-bit words.</p>
 
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="abbrevid">Abbreviation IDs</a>
-</div>
+<h3>
+  <a name="abbrevid">Abbreviation IDs</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 A bitstream is a sequential series of <a href="#blocks">Blocks</a> and
@@ -253,10 +255,11 @@ an <a href="#abbrev_records">abbreviated record encoding</a>.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="blocks">Blocks</a>
-</div>
+<h3>
+  <a name="blocks">Blocks</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 Blocks in a bitstream denote nested regions of the stream, and are identified by
@@ -297,13 +300,10 @@ its own set of abbreviations, and its own abbrev id width.  When a sub-block is
 popped, the saved values are restored.
 </p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="ENTER_SUBBLOCK">ENTER_SUBBLOCK
-Encoding</a></div>
+<h4><a name="ENTER_SUBBLOCK">ENTER_SUBBLOCK Encoding</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[ENTER_SUBBLOCK, blockid<sub>vbr8</sub>, newabbrevlen<sub>vbr4</sub>,
      &lt;align32bits&gt;, blocklen<sub>32</sub>]</tt></p>
@@ -322,10 +322,9 @@ reader to skip over the entire block in one jump.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="END_BLOCK">END_BLOCK
-Encoding</a></div>
+<h4><a name="END_BLOCK">END_BLOCK Encoding</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[END_BLOCK, &lt;align32bits&gt;]</tt></p>
 
@@ -337,13 +336,14 @@ an even multiple of 32-bits.
 
 </div>
 
-
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="datarecord">Data Records</a>
-</div>
+<h3>
+  <a name="datarecord">Data Records</a>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>
 Data records consist of a record code and a number of (up to) 64-bit
 integer values.  The interpretation of the code and values is
@@ -355,13 +355,10 @@ which encodes the target triple of a module.  The code is
 ASCII codes for the characters in the string.
 </p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="UNABBREV_RECORD">UNABBREV_RECORD
-Encoding</a></div>
+<h4><a name="UNABBREV_RECORD">UNABBREV_RECORD Encoding</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[UNABBREV_RECORD, code<sub>vbr6</sub>, numops<sub>vbr6</sub>,
        op0<sub>vbr6</sub>, op1<sub>vbr6</sub>, ...]</tt></p>
@@ -385,10 +382,9 @@ bits.  This is not an efficient encoding, but it is fully general.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="abbrev_records">Abbreviated Record
-Encoding</a></div>
+<h4><a name="abbrev_records">Abbreviated Record Encoding</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[&lt;abbrevid&gt;, fields...]</tt></p>
 
@@ -409,11 +405,14 @@ operand value).</p>
 
 </div>
 
-<!-- ======================================================================= -->
-<div class="doc_subsection"><a name="abbreviations">Abbreviations</a>
 </div>
 
-<div class="doc_text">
+<!-- ======================================================================= -->
+<h3>
+  <a name="abbreviations">Abbreviations</a>
+</h3>
+
+<div>
 <p>
 Abbreviations are an important form of compression for bitstreams.  The idea is
 to specify a dense encoding for a class of records once, then use that encoding
@@ -431,13 +430,11 @@ As a concrete example, LLVM IR files usually emit an abbreviation
 for binary operators.  If a specific LLVM module contained no or few binary
 operators, the abbreviation does not need to be emitted.
 </p>
-</div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="DEFINE_ABBREV">DEFINE_ABBREV
- Encoding</a></div>
+<h4><a name="DEFINE_ABBREV">DEFINE_ABBREV  Encoding</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[DEFINE_ABBREV, numabbrevops<sub>vbr5</sub>, abbrevop0, abbrevop1,
  ...]</tt></p>
@@ -552,11 +549,14 @@ used for any other string value.
 
 </div>
 
-<!-- ======================================================================= -->
-<div class="doc_subsection"><a name="stdblocks">Standard Blocks</a>
 </div>
 
-<div class="doc_text">
+<!-- ======================================================================= -->
+<h3>
+  <a name="stdblocks">Standard Blocks</a>
+</h3>
+
+<div>
 
 <p>
 In addition to the basic block structure and record encodings, the bitstream
@@ -565,13 +565,10 @@ stream is to be decoded or other metadata.  In the future, new standard blocks
 may be added.  Block IDs 0-7 are reserved for standard blocks.
 </p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="BLOCKINFO">#0 - BLOCKINFO
-Block</a></div>
+<h4><a name="BLOCKINFO">#0 - BLOCKINFO Block</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 The <tt>BLOCKINFO</tt> block allows the description of metadata for other
@@ -620,11 +617,15 @@ from the corresponding blocks.  It is not safe to skip them.
 
 </div>
 
+</div>
+
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"> <a name="wrapper">Bitcode Wrapper Format</a></div>
+<h2><a name="wrapper">Bitcode Wrapper Format</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Bitcode files for LLVM IR may optionally be wrapped in a simple wrapper
@@ -652,10 +653,10 @@ value that can be used to encode the CPU of the target.
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"> <a name="llvmir">LLVM IR Encoding</a></div>
+<h2><a name="llvmir">LLVM IR Encoding</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 LLVM IR is encoded into a bitstream by defining blocks and records.  It uses
@@ -666,16 +667,17 @@ that the writer uses, as these are fully self-described in the file, and the
 reader is not allowed to build in any knowledge of this.
 </p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="basics">Basics</a>
-</div>
+<h3>
+  <a name="basics">Basics</a>
+</h3>
+
+<div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="ir_magic">LLVM IR Magic Number</a></div>
+<h4><a name="ir_magic">LLVM IR Magic Number</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 The magic number for LLVM IR files is:
@@ -695,9 +697,9 @@ When combined with the bitcode magic number and viewed as bytes, this is
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="ir_signed_vbr">Signed VBRs</a></div>
+<h4><a name="ir_signed_vbr">Signed VBRs</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 <a href="#variablewidth">Variable Width Integer</a> encoding is an efficient way to
@@ -728,9 +730,9 @@ within <tt>CONSTANTS_BLOCK</tt> blocks.
 
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="ir_blocks">LLVM IR Blocks</a></div>
+<h4><a name="ir_blocks">LLVM IR Blocks</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 LLVM IR is defined with the following blocks:
@@ -758,11 +760,14 @@ LLVM IR is defined with the following blocks:
 
 </div>
 
-<!-- ======================================================================= -->
-<div class="doc_subsection"><a name="MODULE_BLOCK">MODULE_BLOCK Contents</a>
 </div>
 
-<div class="doc_text">
+<!-- ======================================================================= -->
+<h3>
+  <a name="MODULE_BLOCK">MODULE_BLOCK Contents</a>
+</h3>
+
+<div>
 
 <p>The <tt>MODULE_BLOCK</tt> block (id 8) is the top-level block for LLVM
 bitcode files, and each bitcode file must contain exactly one. In
@@ -782,13 +787,10 @@ following sub-blocks:
 <li><a href="#METADATA_BLOCK"><tt>METADATA_BLOCK</tt></a></li>
 </ul>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="MODULE_CODE_VERSION">MODULE_CODE_VERSION Record</a>
-</div>
+<h4><a name="MODULE_CODE_VERSION">MODULE_CODE_VERSION Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[VERSION, version#]</tt></p>
 
@@ -798,10 +800,9 @@ time.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="MODULE_CODE_TRIPLE">MODULE_CODE_TRIPLE Record</a>
-</div>
+<h4><a name="MODULE_CODE_TRIPLE">MODULE_CODE_TRIPLE Record</a></h4>
 
-<div class="doc_text">
+<div>
 <p><tt>[TRIPLE, ...string...]</tt></p>
 
 <p>The <tt>TRIPLE</tt> record (code 2) contains a variable number of
@@ -810,10 +811,9 @@ specification string.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="MODULE_CODE_DATALAYOUT">MODULE_CODE_DATALAYOUT Record</a>
-</div>
+<h4><a name="MODULE_CODE_DATALAYOUT">MODULE_CODE_DATALAYOUT Record</a></h4>
 
-<div class="doc_text">
+<div>
 <p><tt>[DATALAYOUT, ...string...]</tt></p>
 
 <p>The <tt>DATALAYOUT</tt> record (code 3) contains a variable number of
@@ -822,10 +822,9 @@ specification string.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="MODULE_CODE_ASM">MODULE_CODE_ASM Record</a>
-</div>
+<h4><a name="MODULE_CODE_ASM">MODULE_CODE_ASM Record</a></h4>
 
-<div class="doc_text">
+<div>
 <p><tt>[ASM, ...string...]</tt></p>
 
 <p>The <tt>ASM</tt> record (code 4) contains a variable number of
@@ -834,10 +833,9 @@ individual assembly blocks separated by newline (ASCII 10) characters.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="MODULE_CODE_SECTIONNAME">MODULE_CODE_SECTIONNAME Record</a>
-</div>
+<h4><a name="MODULE_CODE_SECTIONNAME">MODULE_CODE_SECTIONNAME Record</a></h4>
 
-<div class="doc_text">
+<div>
 <p><tt>[SECTIONNAME, ...string...]</tt></p>
 
 <p>The <tt>SECTIONNAME</tt> record (code 5) contains a variable number
@@ -850,10 +848,9 @@ referenced by the 1-based index in the <i>section</i> fields of
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="MODULE_CODE_DEPLIB">MODULE_CODE_DEPLIB Record</a>
-</div>
+<h4><a name="MODULE_CODE_DEPLIB">MODULE_CODE_DEPLIB Record</a></h4>
 
-<div class="doc_text">
+<div>
 <p><tt>[DEPLIB, ...string...]</tt></p>
 
 <p>The <tt>DEPLIB</tt> record (code 6) contains a variable number of
@@ -864,10 +861,9 @@ library name referenced.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="MODULE_CODE_GLOBALVAR">MODULE_CODE_GLOBALVAR Record</a>
-</div>
+<h4><a name="MODULE_CODE_GLOBALVAR">MODULE_CODE_GLOBALVAR Record</a></h4>
 
-<div class="doc_text">
+<div>
 <p><tt>[GLOBALVAR, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal]</tt></p>
 
 <p>The <tt>GLOBALVAR</tt> record (code 7) marks the declaration or
@@ -923,16 +919,15 @@ encoding of the visibility of this variable:
 is <tt>thread_local</tt></li>
 
 <li><i>unnamed_addr</i>: If present and non-zero, indicates that the variable
-has <tt>unnamed_addr<tt></li>
+has <tt>unnamed_addr</tt></li>
 
 </ul>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="MODULE_CODE_FUNCTION">MODULE_CODE_FUNCTION Record</a>
-</div>
+<h4><a name="MODULE_CODE_FUNCTION">MODULE_CODE_FUNCTION Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[FUNCTION, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc]</tt></p>
 
@@ -980,16 +975,15 @@ index in the table of
 <a href="#MODULE_CODE_GCNAME">MODULE_CODE_GCNAME</a> entries.</li>
 
 <li><i>unnamed_addr</i>: If present and non-zero, indicates that the function
-has <tt>unnamed_addr<tt></li>
+has <tt>unnamed_addr</tt></li>
 
 </ul>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="MODULE_CODE_ALIAS">MODULE_CODE_ALIAS Record</a>
-</div>
+<h4><a name="MODULE_CODE_ALIAS">MODULE_CODE_ALIAS Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[ALIAS, alias type, aliasee val#, linkage, visibility]</tt></p>
 
@@ -1011,10 +1005,9 @@ for this alias</li>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="MODULE_CODE_PURGEVALS">MODULE_CODE_PURGEVALS Record</a>
-</div>
+<h4><a name="MODULE_CODE_PURGEVALS">MODULE_CODE_PURGEVALS Record</a></h4>
 
-<div class="doc_text">
+<div>
 <p><tt>[PURGEVALS, numvals]</tt></p>
 
 <p>The <tt>PURGEVALS</tt> record (code 10) resets the module-level
@@ -1025,10 +1018,9 @@ new value indices will start from the given <i>numvals</i> value.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="MODULE_CODE_GCNAME">MODULE_CODE_GCNAME Record</a>
-</div>
+<h4><a name="MODULE_CODE_GCNAME">MODULE_CODE_GCNAME Record</a></h4>
 
-<div class="doc_text">
+<div>
 <p><tt>[GCNAME, ...string...]</tt></p>
 
 <p>The <tt>GCNAME</tt> record (code 11) contains a variable number of
@@ -1039,11 +1031,14 @@ the module. These records can be referenced by 1-based index in the <i>gc</i>
 fields of <tt>FUNCTION</tt> records.</p>
 </div>
 
-<!-- ======================================================================= -->
-<div class="doc_subsection"><a name="PARAMATTR_BLOCK">PARAMATTR_BLOCK Contents</a>
 </div>
 
-<div class="doc_text">
+<!-- ======================================================================= -->
+<h3>
+  <a name="PARAMATTR_BLOCK">PARAMATTR_BLOCK Contents</a>
+</h3>
+
+<div>
 
 <p>The <tt>PARAMATTR_BLOCK</tt> block (id 9) contains a table of
 entries describing the attributes of function parameters. These
@@ -1057,14 +1052,10 @@ href="#FUNC_CODE_INST_CALL"><tt>INST_CALL</tt></a> records.</p>
 that each is unique (i.e., no two indicies represent equivalent
 attribute lists). </p>
 
-</div>
-
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="PARAMATTR_CODE_ENTRY">PARAMATTR_CODE_ENTRY Record</a>
-</div>
+<h4><a name="PARAMATTR_CODE_ENTRY">PARAMATTR_CODE_ENTRY Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[ENTRY, paramidx0, attr0, paramidx1, attr1...]</tt></p>
 
@@ -1105,11 +1096,14 @@ the logarithm base 2 of the requested alignment, plus 1</li>
 </ul>
 </div>
 
-<!-- ======================================================================= -->
-<div class="doc_subsection"><a name="TYPE_BLOCK">TYPE_BLOCK Contents</a>
 </div>
 
-<div class="doc_text">
+<!-- ======================================================================= -->
+<h3>
+  <a name="TYPE_BLOCK">TYPE_BLOCK Contents</a>
+</h3>
+
+<div>
 
 <p>The <tt>TYPE_BLOCK</tt> block (id 10) contains records which
 constitute a table of type operator entries used to represent types
@@ -1124,13 +1118,10 @@ type operator records.
 each entry is unique (i.e., no two indicies represent structurally
 equivalent types). </p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_NUMENTRY">TYPE_CODE_NUMENTRY Record</a>
-</div>
+<h4><a name="TYPE_CODE_NUMENTRY">TYPE_CODE_NUMENTRY Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[NUMENTRY, numentries]</tt></p>
 
@@ -1142,10 +1133,9 @@ in the block.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_VOID">TYPE_CODE_VOID Record</a>
-</div>
+<h4><a name="TYPE_CODE_VOID">TYPE_CODE_VOID Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[VOID]</tt></p>
 
@@ -1155,10 +1145,9 @@ type table.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_FLOAT">TYPE_CODE_FLOAT Record</a>
-</div>
+<h4><a name="TYPE_CODE_FLOAT">TYPE_CODE_FLOAT Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[FLOAT]</tt></p>
 
@@ -1168,10 +1157,9 @@ floating point) type to the type table.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_DOUBLE">TYPE_CODE_DOUBLE Record</a>
-</div>
+<h4><a name="TYPE_CODE_DOUBLE">TYPE_CODE_DOUBLE Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[DOUBLE]</tt></p>
 
@@ -1181,10 +1169,9 @@ floating point) type to the type table.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_LABEL">TYPE_CODE_LABEL Record</a>
-</div>
+<h4><a name="TYPE_CODE_LABEL">TYPE_CODE_LABEL Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[LABEL]</tt></p>
 
@@ -1194,10 +1181,9 @@ the type table.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_OPAQUE">TYPE_CODE_OPAQUE Record</a>
-</div>
+<h4><a name="TYPE_CODE_OPAQUE">TYPE_CODE_OPAQUE Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[OPAQUE]</tt></p>
 
@@ -1208,10 +1194,9 @@ unified.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_INTEGER">TYPE_CODE_INTEGER  Record</a>
-</div>
+<h4><a name="TYPE_CODE_INTEGER">TYPE_CODE_INTEGER  Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[INTEGER, width]</tt></p>
 
@@ -1222,10 +1207,9 @@ integer type.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_POINTER">TYPE_CODE_POINTER Record</a>
-</div>
+<h4><a name="TYPE_CODE_POINTER">TYPE_CODE_POINTER Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[POINTER, pointee type, address space]</tt></p>
 
@@ -1243,10 +1227,9 @@ default address space is zero.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_FUNCTION">TYPE_CODE_FUNCTION Record</a>
-</div>
+<h4><a name="TYPE_CODE_FUNCTION">TYPE_CODE_FUNCTION Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[FUNCTION, vararg, ignored, retty, ...paramty... ]</tt></p>
 
@@ -1268,10 +1251,9 @@ parameter types of the function</li>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_STRUCT">TYPE_CODE_STRUCT Record</a>
-</div>
+<h4><a name="TYPE_CODE_STRUCT">TYPE_CODE_STRUCT Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[STRUCT, ispacked, ...eltty...]</tt></p>
 
@@ -1287,10 +1269,9 @@ types of the structure</li>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_ARRAY">TYPE_CODE_ARRAY Record</a>
-</div>
+<h4><a name="TYPE_CODE_ARRAY">TYPE_CODE_ARRAY Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[ARRAY, numelts, eltty]</tt></p>
 
@@ -1305,10 +1286,9 @@ table.  The operand fields are</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_VECTOR">TYPE_CODE_VECTOR Record</a>
-</div>
+<h4><a name="TYPE_CODE_VECTOR">TYPE_CODE_VECTOR Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[VECTOR, numelts, eltty]</tt></p>
 
@@ -1323,10 +1303,9 @@ table.  The operand fields are</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_X86_FP80">TYPE_CODE_X86_FP80 Record</a>
-</div>
+<h4><a name="TYPE_CODE_X86_FP80">TYPE_CODE_X86_FP80 Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[X86_FP80]</tt></p>
 
@@ -1336,10 +1315,9 @@ floating point) type to the type table.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_FP128">TYPE_CODE_FP128 Record</a>
-</div>
+<h4><a name="TYPE_CODE_FP128">TYPE_CODE_FP128 Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[FP128]</tt></p>
 
@@ -1349,10 +1327,9 @@ floating point) type to the type table.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_PPC_FP128">TYPE_CODE_PPC_FP128 Record</a>
-</div>
+<h4><a name="TYPE_CODE_PPC_FP128">TYPE_CODE_PPC_FP128 Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[PPC_FP128]</tt></p>
 
@@ -1362,10 +1339,9 @@ floating point) type to the type table.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_METADATA">TYPE_CODE_METADATA Record</a>
-</div>
+<h4><a name="TYPE_CODE_METADATA">TYPE_CODE_METADATA Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[METADATA]</tt></p>
 
@@ -1374,11 +1350,14 @@ type to the type table.
 </p>
 </div>
 
-<!-- ======================================================================= -->
-<div class="doc_subsection"><a name="CONSTANTS_BLOCK">CONSTANTS_BLOCK Contents</a>
 </div>
 
-<div class="doc_text">
+<!-- ======================================================================= -->
+<h3>
+  <a name="CONSTANTS_BLOCK">CONSTANTS_BLOCK Contents</a>
+</h3>
+
+<div>
 
 <p>The <tt>CONSTANTS_BLOCK</tt> block (id 11) ...
 </p>
@@ -1387,10 +1366,11 @@ type to the type table.
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="FUNCTION_BLOCK">FUNCTION_BLOCK Contents</a>
-</div>
+<h3>
+  <a name="FUNCTION_BLOCK">FUNCTION_BLOCK Contents</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>FUNCTION_BLOCK</tt> block (id 12) ...
 </p>
@@ -1409,23 +1389,21 @@ type to the type table.
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="TYPE_SYMTAB_BLOCK">TYPE_SYMTAB_BLOCK Contents</a>
-</div>
+<h3>
+  <a name="TYPE_SYMTAB_BLOCK">TYPE_SYMTAB_BLOCK Contents</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>TYPE_SYMTAB_BLOCK</tt> block (id 13) contains entries which
 map between module-level named types and their corresponding type
 indices.
 </p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TST_CODE_ENTRY">TST_CODE_ENTRY Record</a>
-</div>
+<h4><a name="TST_CODE_ENTRY">TST_CODE_ENTRY Record</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>[ENTRY, typeid, ...string...]</tt></p>
 
@@ -1436,12 +1414,14 @@ name. Each entry corresponds to a single named type.
 </p>
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="VALUE_SYMTAB_BLOCK">VALUE_SYMTAB_BLOCK Contents</a>
-</div>
+<h3>
+  <a name="VALUE_SYMTAB_BLOCK">VALUE_SYMTAB_BLOCK Contents</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>VALUE_SYMTAB_BLOCK</tt> block (id 14) ... 
 </p>
@@ -1450,10 +1430,11 @@ name. Each entry corresponds to a single named type.
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="METADATA_BLOCK">METADATA_BLOCK Contents</a>
-</div>
+<h3>
+  <a name="METADATA_BLOCK">METADATA_BLOCK Contents</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>METADATA_BLOCK</tt> block (id 15) ...
 </p>
@@ -1462,16 +1443,18 @@ name. Each entry corresponds to a single named type.
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="METADATA_ATTACHMENT">METADATA_ATTACHMENT Contents</a>
-</div>
+<h3>
+  <a name="METADATA_ATTACHMENT">METADATA_ATTACHMENT Contents</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>METADATA_ATTACHMENT</tt> block (id 16) ...
 </p>
 
 </div>
 
+</div>
 
 <!-- *********************************************************************** -->
 <hr>
@@ -1480,8 +1463,8 @@ name. Each entry corresponds to a single named type.
 <a href="http://validator.w3.org/check/referer"><img
  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-Last modified: $Date: 2011-01-08 17:42:36 +0100 (Sat, 08 Jan 2011) $
+<a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/Bugpoint.html b/docs/Bugpoint.html
index cbd71aa..05c867b 100644
--- a/docs/Bugpoint.html
+++ b/docs/Bugpoint.html
@@ -6,9 +6,9 @@
   <link rel="stylesheet" href="llvm.css" type="text/css">
 </head>
 
-<div class="doc_title">
+<h1>
   LLVM bugpoint tool: design and usage
-</div>
+</h1>
 
 <ul>
   <li><a href="#desc">Description</a></li>
@@ -27,12 +27,12 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
 <a name="desc">Description</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p><tt>bugpoint</tt> narrows down the source of problems in LLVM tools and
 passes.  It can be used to debug three types of failures: optimizer crashes,
@@ -50,12 +50,12 @@ href="HowToSubmitABug.html">How To Submit a Bug Report document</a>.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
 <a name="design">Design Philosophy</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p><tt>bugpoint</tt> is designed to be a useful tool without requiring any
 hooks into the LLVM infrastructure at all.  It works with any and all LLVM
@@ -68,14 +68,12 @@ is still worth it. Note that <tt>bugpoint</tt> is generally very quick unless
 debugging a miscompilation where each test of the program (which requires 
 executing it) takes a long time.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="autoselect">Automatic Debugger Selection</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p><tt>bugpoint</tt> reads each <tt>.bc</tt> or <tt>.ll</tt> file specified on
 the command line and links them together into a single module, called the test
@@ -104,11 +102,11 @@ Otherwise, there is no problem <tt>bugpoint</tt> can debug.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="crashdebug">Crash debugger</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>If an optimizer or code generator crashes, <tt>bugpoint</tt> will try as hard
 as it can to reduce the list of passes (for optimizer crashes) and the size of
@@ -129,11 +127,11 @@ reproduce the failure with <tt>opt</tt> or <tt>llc</tt>.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="codegendebug">Code generator debugger</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The code generator debugger attempts to narrow down the amount of code that
 is being miscompiled by the selected code generator.  To do this, it takes the
@@ -150,11 +148,11 @@ good code.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="miscompilationdebug">Miscompilation debugger</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The miscompilation debugger works similarly to the code generator debugger.
 It works by splitting the test program into two pieces, running the
@@ -166,13 +164,15 @@ assumes that the selected code generator is working properly.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="advice">Advice for using bugpoint</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <tt>bugpoint</tt> can be a remarkably useful tool, but it sometimes works in
 non-obvious ways.  Here are some hints and tips:<p>
@@ -242,8 +242,8 @@ non-obvious ways.  Here are some hints and tips:<p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/CFEBuildInstrs.html b/docs/CFEBuildInstrs.html
index ed2f295..ab10844 100644
--- a/docs/CFEBuildInstrs.html
+++ b/docs/CFEBuildInstrs.html
@@ -21,7 +21,7 @@ This page has moved <a href="GCCFEBuildInstrs.html">here</A>.
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
   Last modified: $Date: 2008-02-13 17:46:10 +0100 (Wed, 13 Feb 2008) $
 </address>
 
diff --git a/docs/CMake.html b/docs/CMake.html
index e303d13..0d8cf62 100644
--- a/docs/CMake.html
+++ b/docs/CMake.html
@@ -6,9 +6,9 @@
   <link rel="stylesheet" href="llvm.css" type="text/css">
 </head>
 
-<div class="doc_title">
+<h1>
   Building LLVM with CMake
-</div>
+</h1>
 
 <ul>
   <li><a href="#intro">Introduction</a></li>
@@ -22,6 +22,9 @@
   <li><a href="#testing">Executing the test suite</a>
   <li><a href="#cross">Cross compiling</a>
   <li><a href="#embedding">Embedding LLVM in your project</a>
+    <ul>
+    <li><a href="#passdev">Developing LLVM pass out of source</a></li>
+  </ul></li>
   <li><a href="#specifics">Compiler/Platform specific topics</a>
     <ul>
     <li><a href="#msvc">Microsoft Visual C++</a></li>
@@ -33,12 +36,12 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
 <a name="intro">Introduction</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
   <p><a href="http://www.cmake.org/">CMake</a> is a cross-platform
     build-generator tool. CMake does not build the project, it generates
@@ -56,12 +59,12 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
 <a name="quickstart">Quick start</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p> We use here the command-line, non-interactive CMake interface </p>
 
@@ -109,12 +112,12 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="usage">Basic CMake usage</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
   <p>This section explains basic aspects of CMake, mostly for
     explaining those options which you may need on your day-to-day
@@ -157,12 +160,12 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="options">Options and variables</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
   <p>Variables customize how the build will be generated. Options are
     boolean variables, with possible values ON/OFF. Options and
@@ -191,14 +194,12 @@
     <p><tt>cmake -DVARIABLE:TYPE=value path/to/llvm/source</tt></p>
   </div>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="freccmake">Frequently-used CMake variables</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Here are listed some of the CMake variables that are used often,
   along with a brief explanation and LLVM-specific notes. For full
@@ -237,11 +238,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="llvmvars">LLVM-specific variables</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <dl>
   <dt><b>LLVM_TARGETS_TO_BUILD</b>:STRING</dt>
@@ -342,7 +343,7 @@
   <dt><b>LLVM_LIT_TOOLS_DIR</b>:STRING</dt>
   <dd>The path to GnuWin32 tools for tests. Valid on Windows host.
     Defaults to "", then Lit seeks tools according to %PATH%.
-    Lit can find tools(eg. grep, sort, &c) on LLVM_LIT_TOOLS_DIR at first,
+    Lit can find tools(eg. grep, sort, &amp;c) on LLVM_LIT_TOOLS_DIR at first,
     without specifying GnuWin32 to %PATH%.</dd>
 
   <dt><b>LLVM_ENABLE_FFI</b>:BOOL</dt>
@@ -354,13 +355,15 @@
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="testing">Executing the test suite</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Testing is performed when the <i>check</i> target is built. For
   instance, if you are using makefiles, execute this command while on
@@ -375,12 +378,12 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="cross">Cross compiling</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>See <a href="http://www.vtk.org/Wiki/CMake_Cross_Compiling">this
     wiki page</a> for generic instructions on how to cross-compile
@@ -396,12 +399,12 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="embedding">Embedding LLVM in your project</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
   <p>The most difficult part of adding LLVM to the build of a project
     is to determine the set of LLVM libraries corresponding to the set
@@ -418,7 +421,7 @@
     endif()
     <b># We incorporate the CMake features provided by LLVM:</b>
     set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${LLVM_ROOT}/share/llvm/cmake")
-    include(LLVM)
+    include(LLVMConfig)
     <b># Now set the header and library paths:</b>
     include_directories( ${LLVM_ROOT}/include )
     link_directories( ${LLVM_ROOT}/lib )
@@ -436,20 +439,111 @@
     headers on the LLVM source directory (if we are building
     out-of-source.)</p>
 
-</div>
+  <p>Alternativaly, you can utilize CMake's <i>find_package</i>
+    functionality. Here is an equivalent variant of snippet shown above:</p>
+
+  <div class="doc_code">
+    <pre>
+    find_package(LLVM)
+
+    if( NOT LLVM_FOUND )
+      message(FATAL_ERROR "LLVM package can't be found. Set CMAKE_PREFIX_PATH variable to LLVM's installation prefix.")
+    endif()
+
+    include_directories( ${LLVM_INCLUDE_DIRS} )
+    link_directories( ${LLVM_LIBRARY_DIRS} )
+
+    llvm_map_components_to_libraries(REQ_LLVM_LIBRARIES jit native)
+
+    target_link_libraries(mycompiler ${REQ_LLVM_LIBRARIES})
+    </pre>
+  </div>
+
+<!-- ======================================================================= -->
+<h3>
+  <a name="passdev">Developing LLVM pass out of source</a>
+</h3>
+
+<div>
 
+  <p>It is possible to develop LLVM passes against installed LLVM.
+     An example of project layout provided below:</p>
+
+  <div class="doc_code">
+    <pre>
+      &lt;project dir&gt;/
+          |
+          CMakeLists.txt
+          &lt;pass name&gt;/
+              |
+              CMakeLists.txt
+              Pass.cpp
+              ...
+    </pre>
+  </div>
+
+  <p>Contents of &lt;project dir&gt;/CMakeLists.txt:</p>
+
+  <div class="doc_code">
+    <pre>
+    find_package(LLVM)
+
+    <b># Define add_llvm_* macro's.</b>
+    include(AddLLVM)
+
+    add_definitions(${LLVM_DEFINITIONS})
+    include_directories(${LLVM_INCLUDE_DIRS})
+    link_directories(${LLVM_LIBRARY_DIRS})
+
+    add_subdirectory(&lt;pass name&gt;)
+    </pre>
+  </div>
+
+  <p>Contents of &lt;project dir&gt;/&lt;pass name&gt;/CMakeLists.txt:</p>
+
+  <div class="doc_code">
+    <pre>
+    add_llvm_loadable_module(LLVMPassname
+      Pass.cpp
+      )
+    </pre>
+  </div>
+
+  <p>When you are done developing your pass, you may wish to integrate it
+     into LLVM source tree. You can achieve it in two easy steps:<br>
+     1. Copying &lt;pass name&gt; folder into &lt;LLVM root&gt;/lib/Transform directory.<br>
+     2. Adding "add_subdirectory(&lt;pass name&gt;)" line into &lt;LLVM root&gt;/lib/Transform/CMakeLists.txt</p>
+</div>
 <!-- *********************************************************************** -->
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="specifics">Compiler/Platform specific topics</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Notes for specific compilers and/or platforms.</p>
 
+<h3>
+  <a name="msvc">Microsoft Visual C++</a>
+</h3>
+
+<div>
+
+<dl>
+  <dt><b>LLVM_COMPILER_JOBS</b>:STRING</dt>
+  <dd>Specifies the maximum number of parallell compiler jobs to use
+    per project when building with msbuild or Visual Studio. Only supported for
+    Visual Studio 2008 and Visual Studio 2010 CMake generators. 0 means use all
+    processors. Default is 0.</dd>
+</dl>
+
+</div>
+
 </div>
 
 <!-- *********************************************************************** -->
@@ -462,7 +556,7 @@
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:ofv@wanadoo.es">Oscar Fuentes</a><br>
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
   Last modified: $Date: 2010-08-09 03:59:36 +0100 (Mon, 9 Aug 2010) $
 </address>
 
diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html
index 925156f..d082acc 100644
--- a/docs/CodeGenerator.html
+++ b/docs/CodeGenerator.html
@@ -19,9 +19,9 @@
 </head>
 <body>
 
-<div class="doc_title">
+<h1>
   The LLVM Target-Independent Code Generator
-</div>
+</h1>
 
 <ol>
   <li><a href="#introduction">Introduction</a>
@@ -127,12 +127,12 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="introduction">Introduction</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM target-independent code generator is a framework that provides a
    suite of reusable components for translating the LLVM internal representation
@@ -188,14 +188,12 @@
    depend on the target-description and machine code representation classes,
    ensuring that it is portable.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
  <a name="required">Required components in the code generator</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The two pieces of the LLVM code generator are the high-level interface to the
    code generator and the set of reusable components that can be used to build
@@ -223,11 +221,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
  <a name="high-level-design">The high-level design of the code generator</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM target-independent code generator is designed to support efficient
    and quality code generation for standard register-based microprocessors.
@@ -297,11 +295,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
  <a name="tablegen">Using TableGen for target description</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The target description classes require a detailed description of the target
    architecture.  These target descriptions often have a large amount of common
@@ -324,13 +322,15 @@
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="targetdesc">Target description classes</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM target description classes (located in the
    <tt>include/llvm/Target</tt> directory) provide an abstract description of
@@ -346,14 +346,12 @@
    <tt><a href="#targetmachine">TargetMachine</a></tt> class provides accessors
    that should be implemented by the target.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="targetmachine">The <tt>TargetMachine</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>TargetMachine</tt> class provides virtual methods that are used to
    access the target-specific implementations of the various target description
@@ -369,11 +367,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="targetdata">The <tt>TargetData</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>TargetData</tt> class is the only required target description class,
    and it is the only class that is not extensible (you cannot derived a new
@@ -385,11 +383,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="targetlowering">The <tt>TargetLowering</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>TargetLowering</tt> class is used by SelectionDAG based instruction
    selectors primarily to describe how LLVM code should be lowered to
@@ -411,11 +409,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="targetregisterinfo">The <tt>TargetRegisterInfo</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>TargetRegisterInfo</tt> class is used to describe the register file
    of the target and any interactions between the registers.</p>
@@ -445,11 +443,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="targetinstrinfo">The <tt>TargetInstrInfo</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>TargetInstrInfo</tt> class is used to describe the machine
    instructions supported by the target. It is essentially an array of
@@ -463,11 +461,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="targetframeinfo">The <tt>TargetFrameInfo</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>TargetFrameInfo</tt> class is used to provide information about the
    stack frame layout of the target. It holds the direction of stack growth, the
@@ -479,11 +477,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="targetsubtarget">The <tt>TargetSubtarget</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>TargetSubtarget</tt> class is used to provide information about the
    specific chip set being targeted.  A sub-target informs code generation of
@@ -495,11 +493,11 @@
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="targetjitinfo">The <tt>TargetJITInfo</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>TargetJITInfo</tt> class exposes an abstract interface used by the
    Just-In-Time code generator to perform target-specific activities, such as
@@ -509,13 +507,15 @@
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="codegendesc">Machine code description classes</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>At the high-level, LLVM code is translated to a machine specific
    representation formed out of
@@ -528,14 +528,12 @@
    SSA representation for machine code, as well as a register allocated, non-SSA
    form.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="machineinstr">The <tt>MachineInstr</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Target machine instructions are represented as instances of the
    <tt>MachineInstr</tt> class.  This class is an extremely abstract way of
@@ -576,14 +574,12 @@
 <p>Also if the first operand is a def, it is easier to <a href="#buildmi">create
    instructions</a> whose only def is the first operand.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="buildmi">Using the <tt>MachineInstrBuilder.h</tt> functions</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Machine instructions are created by using the <tt>BuildMI</tt> functions,
    located in the <tt>include/llvm/CodeGen/MachineInstrBuilder.h</tt> file.  The
@@ -630,11 +626,11 @@ MI.addReg(Reg, RegState::Define);
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="fixedregs">Fixed (preassigned) registers</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>One important issue that the code generator needs to be aware of is the
    presence of fixed registers.  In particular, there are often places in the
@@ -702,11 +698,11 @@ ret
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ssa">Machine code in SSA form</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>MachineInstr</tt>'s are initially selected in SSA-form, and are
    maintained in SSA-form until register allocation happens.  For the most part,
@@ -719,12 +715,14 @@ ret
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="machinebasicblock">The <tt>MachineBasicBlock</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>MachineBasicBlock</tt> class contains a list of machine instructions
    (<tt><a href="#machineinstr">MachineInstr</a></tt> instances).  It roughly
@@ -737,11 +735,11 @@ ret
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="machinefunction">The <tt>MachineFunction</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>MachineFunction</tt> class contains a list of machine basic blocks
    (<tt><a href="#machinebasicblock">MachineBasicBlock</a></tt> instances).  It
@@ -754,14 +752,15 @@ ret
 
 </div>
 
+</div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="mc">The "MC" Layer</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 The MC Layer is used to represent and process code at the raw machine code
@@ -770,7 +769,7 @@ level, devoid of "high level" information like "constant pools", "jump tables",
 like label names, machine instructions, and sections in the object file.  The
 code in this layer is used for a number of important purposes: the tail end of
 the code generator uses it to write a .s or .o file, and it is also used by the
-llvm-mc tool to implement standalone machine codeassemblers and disassemblers.
+llvm-mc tool to implement standalone machine code assemblers and disassemblers.
 </p>
 
 <p>
@@ -779,15 +778,12 @@ of important subsystems that interact at this layer, they are described later
 in this manual.
 </p>
 
-</div>
-
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="mcstreamer">The <tt>MCStreamer</tt> API</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 MCStreamer is best thought of as an assembler API.  It is an abstract API which
@@ -817,11 +813,11 @@ MCObjectStreamer implements a full assembler.
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="mccontext">The <tt>MCContext</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 The MCContext class is the owner of a variety of uniqued data structures at the
@@ -832,11 +828,11 @@ interact with to create symbols and sections.  This class can not be subclassed.
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="mcsymbol">The <tt>MCSymbol</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 The MCSymbol class represents a symbol (aka label) in the assembly file.  There
@@ -864,11 +860,11 @@ like this to the .s file:<p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="mcsection">The <tt>MCSection</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 The MCSection class represents an object-file specific section. It is subclassed
@@ -882,11 +878,11 @@ directive in a .s file).
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="mcinst">The <tt>MCInst</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 The MCInst class is a target-independent representation of an instruction.  It
@@ -904,27 +900,26 @@ printer, and the type generated by the assembly parser and disassembler.
 
 </div>
 
+</div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="codegenalgs">Target-independent code generation algorithms</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This section documents the phases described in the
    <a href="#high-level-design">high-level design of the code generator</a>.
    It explains how they work and some of the rationale behind their design.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="instselect">Instruction Selection</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Instruction Selection is the process of translating LLVM code presented to
    the code generator into target-specific machine instructions.  There are
@@ -936,14 +931,12 @@ printer, and the type generated by the assembly parser and disassembler.
    selector to be generated from these <tt>.td</tt> files, though currently
    there are still things that require custom C++ code.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="selectiondag_intro">Introduction to SelectionDAGs</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The SelectionDAG provides an abstraction for code representation in a way
    that is amenable to instruction selection using automatic techniques
@@ -1001,11 +994,11 @@ printer, and the type generated by the assembly parser and disassembler.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="selectiondag_process">SelectionDAG Instruction Selection Process</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>SelectionDAG-based instruction selection consists of the following steps:</p>
 
@@ -1082,11 +1075,11 @@ printer, and the type generated by the assembly parser and disassembler.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="selectiondag_build">Initial SelectionDAG Construction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The initial SelectionDAG is na&iuml;vely peephole expanded from the LLVM
    input by the <tt>SelectionDAGLowering</tt> class in the
@@ -1102,11 +1095,11 @@ printer, and the type generated by the assembly parser and disassembler.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="selectiondag_legalize_types">SelectionDAG LegalizeTypes Phase</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The Legalize phase is in charge of converting a DAG to only use the types
    that are natively supported by the target.</p>
@@ -1135,11 +1128,11 @@ printer, and the type generated by the assembly parser and disassembler.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="selectiondag_legalize">SelectionDAG Legalize Phase</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The Legalize phase is in charge of converting a DAG to only use the
    operations that are natively supported by the target.</p>
@@ -1167,12 +1160,13 @@ printer, and the type generated by the assembly parser and disassembler.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="selectiondag_optimize">SelectionDAG Optimization Phase: the DAG
-  Combiner</a>
-</div>
+<h4>
+  <a name="selectiondag_optimize">
+    SelectionDAG Optimization Phase: the DAG Combiner
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The SelectionDAG optimization phase is run multiple times for code
    generation, immediately after the DAG is built and once after each
@@ -1202,11 +1196,11 @@ printer, and the type generated by the assembly parser and disassembler.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="selectiondag_select">SelectionDAG Select Phase</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The Select phase is the bulk of the target-specific code for instruction
    selection.  This phase takes a legal SelectionDAG as input, pattern matches
@@ -1363,11 +1357,11 @@ def : Pat&lt;(i32 imm:$imm),
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="selectiondag_sched">SelectionDAG Scheduling and Formation Phase</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The scheduling phase takes the DAG of target instructions from the selection
    phase and assigns an order.  The scheduler can pick an order depending on
@@ -1384,11 +1378,11 @@ def : Pat&lt;(i32 imm:$imm),
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="selectiondag_future">Future directions for the SelectionDAG</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <ol>
   <li>Optional function-at-a-time selection.</li>
@@ -1398,18 +1392,20 @@ def : Pat&lt;(i32 imm:$imm),
 
 </div>
  
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ssamco">SSA-based Machine Code Optimizations</a>
-</div>
-<div class="doc_text"><p>To Be Written</p></div>
+</h3>
+<div><p>To Be Written</p></div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="liveintervals">Live Intervals</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Live Intervals are the ranges (intervals) where a variable is <i>live</i>.
    They are used by some <a href="#regalloc">register allocator</a> passes to
@@ -1417,14 +1413,12 @@ def : Pat&lt;(i32 imm:$imm),
    register are live at the same point in the program (i.e., they conflict).
    When this situation occurs, one virtual register must be <i>spilled</i>.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="livevariable_analysis">Live Variable Analysis</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The first step in determining the live intervals of variables is to calculate
    the set of registers that are immediately dead after the instruction (i.e.,
@@ -1466,11 +1460,11 @@ def : Pat&lt;(i32 imm:$imm),
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="liveintervals_analysis">Live Intervals Analysis</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>We now have the information available to perform the live intervals analysis
    and build the live intervals themselves.  We start off by numbering the basic
@@ -1485,12 +1479,14 @@ def : Pat&lt;(i32 imm:$imm),
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="regalloc">Register Allocation</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <i>Register Allocation problem</i> consists in mapping a program
    <i>P<sub>v</sub></i>, that can use an unbounded number of virtual registers,
@@ -1500,15 +1496,13 @@ def : Pat&lt;(i32 imm:$imm),
    accommodate all the virtual registers, some of them will have to be mapped
    into memory. These virtuals are called <i>spilled virtuals</i>.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
 
-<div class="doc_subsubsection">
+<h4>
   <a name="regAlloc_represent">How registers are represented in LLVM</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>In LLVM, physical registers are denoted by integer numbers that normally
    range from 1 to 1023. To see how this numbering is defined for a particular
@@ -1617,11 +1611,11 @@ bool RegMapping_Fer::compatible_class(MachineFunction &amp;mf,
 
 <!-- _______________________________________________________________________ -->
 
-<div class="doc_subsubsection">
+<h4>
   <a name="regAlloc_howTo">Mapping virtual registers to physical registers</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>There are two ways to map virtual registers to physical registers (or to
    memory slots). The first way, that we will call <i>direct mapping</i>, is
@@ -1667,11 +1661,11 @@ bool RegMapping_Fer::compatible_class(MachineFunction &amp;mf,
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="regAlloc_twoAddr">Handling two address instructions</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>With very rare exceptions (e.g., function calls), the LLVM machine code
    instructions are three address instructions. That is, each instruction is
@@ -1703,11 +1697,11 @@ bool RegMapping_Fer::compatible_class(MachineFunction &amp;mf,
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="regAlloc_ssaDecon">The SSA deconstruction phase</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>An important transformation that happens during register allocation is called
    the <i>SSA Deconstruction Phase</i>. The SSA form simplifies many analyses
@@ -1727,11 +1721,11 @@ bool RegMapping_Fer::compatible_class(MachineFunction &amp;mf,
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="regAlloc_fold">Instruction folding</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p><i>Instruction folding</i> is an optimization performed during register
    allocation that removes unnecessary copy instructions. For instance, a
@@ -1764,11 +1758,11 @@ bool RegMapping_Fer::compatible_class(MachineFunction &amp;mf,
 
 <!-- _______________________________________________________________________ -->
 
-<div class="doc_subsubsection">
+<h4>
   <a name="regAlloc_builtIn">Built in register allocators</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM infrastructure provides the application developer with three
    different register allocators:</p>
@@ -1805,23 +1799,25 @@ $ llc -regalloc=pbqp file.bc -o pbqp.s;
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="proepicode">Prolog/Epilog Code Insertion</a>
-</div>
-<div class="doc_text"><p>To Be Written</p></div>
+</h3>
+<div><p>To Be Written</p></div>
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="latemco">Late Machine Code Optimizations</a>
-</div>
-<div class="doc_text"><p>To Be Written</p></div>
+</h3>
+<div><p>To Be Written</p></div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="codeemit">Code Emission</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The code emission step of code generation is responsible for lowering from
 the code generator abstractions (like <a 
@@ -1880,14 +1876,15 @@ to implement an assembler for your target.</p>
 
 </div>
 
+</div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="nativeassembler">Implementing a Native Assembler</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Though you're probably reading this because you want to write or maintain a
 compiler backend, LLVM also fully supports building a native assemblers too.
@@ -1896,20 +1893,18 @@ We've tried hard to automate the generation of the assembler from the .td files
 part of the manual and repetitive data entry can be factored and shared with the
 compiler.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection" id="na_instparsing">Instruction Parsing</div>
+<h3 id="na_instparsing">Instruction Parsing</h3>
 
-<div class="doc_text"><p>To Be Written</p></div>
+<div><p>To Be Written</p></div>
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection" id="na_instaliases">
+<h3 id="na_instaliases">
   Instruction Alias Processing
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>Once the instruction is parsed, it enters the MatchInstructionImpl function.
 The MatchInstructionImpl function performs alias processing and then does
 actual matching.</p>
@@ -1922,12 +1917,10 @@ complex/powerful).  Generally you want to use the first alias mechanism that
 meets the needs of your instruction, because it will allow a more concise
 description.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">Mnemonic Aliases</div>
+<h4>Mnemonic Aliases</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The first phase of alias processing is simple instruction mnemonic
 remapping for classes of instructions which are allowed with two different
@@ -1965,9 +1958,9 @@ on the current instruction set.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">Instruction Aliases</div>
+<h4>Instruction Aliases</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The most general phase of alias processing occurs while matching is
 happening: it provides new forms for the matcher to match along with a specific
@@ -2028,36 +2021,33 @@ subtarget specific.</p>
 
 </div>
 
-
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection" id="na_matching">Instruction Matching</div>
-
-<div class="doc_text"><p>To Be Written</p></div>
-
+<h3 id="na_matching">Instruction Matching</h3>
 
+<div><p>To Be Written</p></div>
 
+</div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="targetimpls">Target-specific Implementation Notes</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This section of the document explains features or design decisions that are
    specific to the code generator for a particular target.  First we start
    with a table that summarizes what features are supported by each target.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="targetfeatures">Target Feature Matrix</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Note that this table does not include the C backend or Cpp backends, since
 they do not use the target independent code generator infrastructure.  It also
@@ -2228,12 +2218,10 @@ is the key:</p>
 
 </table>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection" id="feat_reliable">Is Generally Reliable</div>
+<h4 id="feat_reliable">Is Generally Reliable</h4>
 
-<div class="doc_text">
+<div>
 <p>This box indicates whether the target is considered to be production quality.
 This indicates that the target has been used as a static compiler to
 compile large amounts of code by a variety of different people and is in
@@ -2241,9 +2229,9 @@ continuous use.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection" id="feat_asmparser">Assembly Parser</div>
+<h4 id="feat_asmparser">Assembly Parser</h4>
 
-<div class="doc_text">
+<div>
 <p>This box indicates whether the target supports parsing target specific .s
 files by implementing the MCAsmParser interface.  This is required for llvm-mc
 to be able to act as a native assembler and is required for inline assembly
@@ -2253,18 +2241,18 @@ support in the native .o file writer.</p>
 
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection" id="feat_disassembler">Disassembler</div>
+<h4 id="feat_disassembler">Disassembler</h4>
 
-<div class="doc_text">
+<div>
 <p>This box indicates whether the target supports the MCDisassembler API for
 disassembling machine opcode bytes into MCInst's.</p>
 
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection" id="feat_inlineasm">Inline Asm</div>
+<h4 id="feat_inlineasm">Inline Asm</h4>
 
-<div class="doc_text">
+<div>
 <p>This box indicates whether the target supports most popular inline assembly
 constraints and modifiers.</p>
 
@@ -2274,9 +2262,9 @@ constraints relating to the X86 floating point stack.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection" id="feat_jit">JIT Support</div>
+<h4 id="feat_jit">JIT Support</h4>
 
-<div class="doc_text">
+<div>
 <p>This box indicates whether the target supports the JIT compiler through
 the ExecutionEngine interface.</p>
 
@@ -2286,9 +2274,9 @@ in ARM codegen mode, but lacks NEON and full Thumb support.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection" id="feat_objectwrite">.o File Writing</div>
+<h4 id="feat_objectwrite">.o File Writing</h4>
 
-<div class="doc_text">
+<div>
 
 <p>This box indicates whether the target supports writing .o files (e.g. MachO,
 ELF, and/or COFF) files directly from the target.  Note that the target also
@@ -2302,9 +2290,9 @@ file to a .o file (as is the case for many C compilers).</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection" id="feat_tailcall">Tail Calls</div>
+<h4 id="feat_tailcall">Tail Calls</h4>
 
-<div class="doc_text">
+<div>
 
 <p>This box indicates whether the target supports guaranteed tail calls.  These
 are calls marked "<a href="LangRef.html#i_call">tail</a>" and use the fastcc
@@ -2313,15 +2301,14 @@ more more details</a>.</p>
 
 </div>
 
-
-
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="tailcallopt">Tail call optimization</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Tail call optimization, callee reusing the stack of the caller, is currently
    supported on x86/x86-64 and PowerPC. It is performed if:</p>
@@ -2383,11 +2370,11 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
 
 </div>
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="sibcallopt">Sibling call optimization</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Sibling call optimization is a restricted form of tail call optimization.
    Unlike tail call optimization described in the previous section, it can be
@@ -2427,24 +2414,22 @@ entry:
 
 </div>
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="x86">The X86 backend</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The X86 code generator lives in the <tt>lib/Target/X86</tt> directory.  This
    code generator is capable of targeting a variety of x86-32 and x86-64
    processors, and includes support for ISA extensions such as MMX and SSE.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="x86_tt">X86 Target Triples supported</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The following are the known target triples that are supported by the X86
    backend.  This is not an exhaustive list, and it would be useful to add those
@@ -2469,12 +2454,12 @@ entry:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="x86_cc">X86 Calling Conventions supported</a>
-</div>
+</h4>
 
 
-<div class="doc_text">
+<div>
 
 <p>The following target-specific calling conventions are known to backend:</p>
 
@@ -2489,11 +2474,11 @@ entry:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="x86_memory">Representing X86 addressing modes in MachineInstrs</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The x86 has a very flexible way of accessing memory.  It is capable of
    forming memory addresses of the following expression directly in integer
@@ -2526,13 +2511,13 @@ OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg,   SignExtImm  PhysReg
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="x86_memory">X86 address spaces supported</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
-<p>x86 has an experimental feature which provides
+<p>x86 has a feature which provides
    the ability to perform loads and stores to different address spaces
    via the x86 segment registers.  A segment override prefix byte on an
    instruction causes the instruction's memory access to go to the specified
@@ -2571,11 +2556,11 @@ OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg,   SignExtImm  PhysReg
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="x86_names">Instruction naming</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>An instruction name consists of the base name, a default operand size, and a
    a character per operand with an optional special size. For example:</p>
@@ -2591,25 +2576,25 @@ MOVSX32rm16 -&gt; movsx, 32-bit register, 16-bit memory
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ppc">The PowerPC backend</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The PowerPC code generator lives in the lib/Target/PowerPC directory.  The
    code generation is retargetable to several variations or <i>subtargets</i> of
    the PowerPC ISA; including ppc32, ppc64 and altivec.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ppc_abi">LLVM PowerPC ABI</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM follows the AIX PowerPC ABI, with two deviations. LLVM uses a PC
    relative (PIC) or static addressing for accessing global values, so no TOC
@@ -2625,11 +2610,11 @@ MOVSX32rm16 -&gt; movsx, 32-bit register, 16-bit memory
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ppc_frame">Frame Layout</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The size of a PowerPC frame is usually fixed for the duration of a
    function's invocation.  Since the frame is fixed size, all references
@@ -2772,11 +2757,11 @@ MOVSX32rm16 -&gt; movsx, 32-bit register, 16-bit memory
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ppc_prolog">Prolog/Epilog</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The llvm prolog and epilog are the same as described in the PowerPC ABI, with
    the following exceptions.  Callee saved registers are spilled after the frame
@@ -2789,16 +2774,19 @@ MOVSX32rm16 -&gt; movsx, 32-bit register, 16-bit memory
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ppc_dynamic">Dynamic Allocation</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p><i>TODO - More to come.</i></p>
 
 </div>
 
+</div>
+
+</div>
 
 <!-- *********************************************************************** -->
 <hr>
@@ -2809,8 +2797,8 @@ MOVSX32rm16 -&gt; movsx, 32-bit register, 16-bit memory
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-01-09 00:10:59 +0100 (Sun, 09 Jan 2011) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/CodingStandards.html b/docs/CodingStandards.html
index 4a9ab7d..139bbdb 100644
--- a/docs/CodingStandards.html
+++ b/docs/CodingStandards.html
@@ -7,9 +7,9 @@
 </head>
 <body>
 
-<div class="doc_title">
+<h1>
   LLVM Coding Standards
-</div>
+</h1>
 
 <ol>
   <li><a href="#introduction">Introduction</a></li>
@@ -83,12 +83,12 @@
 
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="introduction">Introduction</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This document attempts to describe a few coding standards that are being used
 in the LLVM source tree.  Although no coding standards should be regarded as
@@ -117,22 +117,26 @@ href="mailto:sabre@nondot.org">Chris</a>.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="mechanicalissues">Mechanical Source Issues</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
+<div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="sourceformating">Source Code Formatting</a>
-</div>
+</h3>
+
+<div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="scf_commenting">Commenting</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Comments are one critical part of readability and maintainability.  Everyone
 knows they should comment, so should you.  When writing comments, write them as
@@ -141,7 +145,9 @@ etc.  Although we all should probably
 comment our code more than we do, there are a few very critical places that
 documentation is very useful:</p>
 
-<b>File Headers</b>
+<h5>File Headers</h5>
+
+<div>
 
 <p>Every source file should have a header on it that describes the basic 
 purpose of the file.  If a file does not have a header, it should not be 
@@ -184,7 +190,9 @@ Here it's only two lines.  If an algorithm is being implemented or something
 tricky is going on, a reference to the paper where it is published should be
 included, as well as any notes or "gotchas" in the code to watch out for.</p>
 
-<b>Class overviews</b>
+</div>
+
+<h5>Class overviews</h5>
 
 <p>Classes are one fundamental part of a good object oriented design.  As such,
 a class definition should have a comment block that explains what the class is
@@ -193,7 +201,9 @@ could figure it out, it's probably safe to leave it out.  Naming classes
 something sane goes a long ways towards avoiding writing documentation.</p>
 
 
-<b>Method information</b>
+<h5>Method information</h5>
+
+<div>
 
 <p>Methods defined in a class (as well as any global functions) should also be
 documented properly.  A quick note about what it does and a description of the
@@ -207,12 +217,14 @@ happens: does the method return null?  Abort?  Format your hard disk?</p>
 
 </div>
 
+</div>
+
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="scf_commentformat">Comment Formatting</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>In general, prefer C++ style (<tt>//</tt>) comments.  They take less space,
 require less typing, don't have nesting problems, etc.  There are a few cases
@@ -233,11 +245,11 @@ These nest properly and are better behaved in general than C style comments.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="scf_includes"><tt>#include</tt> Style</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Immediately after the <a href="#scf_commenting">header file comment</a> (and
 include guards if working on a header file), the <a
@@ -273,11 +285,11 @@ implements are defined.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="scf_codewidth">Source Code Width</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Write your code to fit within 80 columns of text.  This helps those of us who
 like to print out code and look at your code in an xterm without resizing
@@ -298,11 +310,11 @@ for debate.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="scf_spacestabs">Use Spaces Instead of Tabs</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>In all cases, prefer spaces to tabs in source files.  People have different
 preferred indentation levels, and different styles of indentation that they
@@ -319,11 +331,11 @@ makes for incredible diffs that are absolutely worthless.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="scf_indentation">Indent Code Consistently</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Okay, in your first year of programming you were told that indentation is
 important.  If you didn't believe and internalize this then, now is the time.
@@ -331,19 +343,21 @@ Just do it.</p>
 
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="compilerissues">Compiler Issues</a>
-</div>
+</h3>
 
+<div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ci_warningerrors">Treat Compiler Warnings Like Errors</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>If your code has compiler warnings in it, something is wrong &mdash; you
 aren't casting values correctly, your have "questionable" constructs in your
@@ -393,11 +407,11 @@ be fixed by massaging the code appropriately.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ci_portable_code">Write Portable Code</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>In almost all cases, it is possible and within reason to write completely
 portable code.  If there are cases where it isn't possible to write portable
@@ -412,10 +426,10 @@ libSystem.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
 <a name="ci_rtti_exceptions">Do not use RTTI or Exceptions</a>
-</div>
-<div class="doc_text">
+</h4>
+<div>
 
 <p>In an effort to reduce code and executable size, LLVM does not use RTTI
 (e.g. <tt>dynamic_cast&lt;&gt;</tt>) or exceptions.  These two language features
@@ -433,10 +447,10 @@ than <tt>dynamic_cast&lt;&gt;</tt>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
 <a name="ci_class_struct">Use of <tt>class</tt> and <tt>struct</tt> Keywords</a>
-</div>
-<div class="doc_text">
+</h4>
+<div>
 
 <p>In C++, the <tt>class</tt> and <tt>struct</tt> keywords can be used almost
 interchangeably. The only difference is when they are used to declare a class:
@@ -454,26 +468,32 @@ which case <tt>struct</tt> is allowed.</p>
 
 </div>
 
+</div>
+
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="styleissues">Style Issues</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
+<div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="macro">The High-Level Issues</a>
-</div>
+</h3>
 <!-- ======================================================================= -->
 
+<div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="hl_module">A Public Header File <b>is</b> a Module</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>C++ doesn't do too well in the modularity department.  There is no real
 encapsulation or data hiding (unless you use expensive protocol classes), but it
@@ -499,11 +519,11 @@ translation unit.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="hl_dontinclude"><tt>#include</tt> as Little as Possible</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>#include</tt> hurts compile time performance.  Don't do it unless you
 have to, especially in header files.</p>
@@ -528,11 +548,11 @@ dependencies that you'll find out about later.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="hl_privateheaders">Keep "Internal" Headers Private</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Many modules have a complex implementation that causes them to use more than
 one implementation (<tt>.cpp</tt>) file.  It is often tempting to put the
@@ -549,11 +569,11 @@ class itself. Just make them private (or protected) and all is well.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="hl_earlyexit">Use Early Exits and <tt>continue</tt> to Simplify Code</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>When reading code, keep in mind how much state and how many previous
 decisions have to be remembered by the reader to understand a block of code.
@@ -658,11 +678,11 @@ can be a big understandability win.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="hl_else_after_return">Don't use <tt>else</tt> after a <tt>return</tt></a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>For similar reasons above (reduction of indentation and easier reading),
 please do not use '<tt>else</tt>' or '<tt>else if</tt>' after something that
@@ -741,11 +761,11 @@ track of when reading the code.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="hl_predicateloops">Turn Predicate Loops into Predicate Functions</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>It is very common to write small loops that just compute a boolean value.
 There are a number of ways that people commonly write these, but an example of
@@ -802,20 +822,24 @@ locality.</p>
 
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="micro">The Low-Level Issues</a>
-</div>
+</h3>
 <!-- ======================================================================= -->
 
+<div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="ll_naming">Name Types, Functions, Variables, and Enumerators Properly</a>
-</div>
+<h4>
+  <a name="ll_naming">
+    Name Types, Functions, Variables, and Enumerators Properly
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Poorly-chosen names can mislead the reader and cause bugs. We cannot stress
 enough how important it is to use <em>descriptive</em> names.  Pick names that
@@ -894,11 +918,11 @@ Vehicle MakeVehicle(VehicleType Type) {
 
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ll_assert">Assert Liberally</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Use the "<tt>assert</tt>" macro to its fullest.  Check all of your
 preconditions and assumptions, you never know when a bug (not necessarily even
@@ -997,11 +1021,11 @@ assert(NewToSet &amp;&amp; "The value shouldn't be in the set yet");
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ll_ns_std">Do Not Use '<tt>using namespace std</tt>'</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>In LLVM, we prefer to explicitly prefix all identifiers from the standard
 namespace with an "<tt>std::</tt>" prefix, rather than rely on
@@ -1035,12 +1059,13 @@ use any others.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="ll_virtual_anch">Provide a Virtual Method Anchor for Classes
-  in Headers</a>
-</div>
+<h4>
+  <a name="ll_virtual_anch">
+    Provide a Virtual Method Anchor for Classes in Headers
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>If a class is defined in a header file and has a v-table (either it has 
 virtual methods or it derives from classes with virtual methods), it must 
@@ -1052,11 +1077,11 @@ increasing link times.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ll_end">Don't evaluate <tt>end()</tt> every time through a loop</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Because C++ doesn't have a standard "<tt>foreach</tt>" loop (though it can be
 emulated with macros and may be coming in C++'0x) we end up writing a lot of
@@ -1114,11 +1139,11 @@ prefer it.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ll_iostream"><tt>#include &lt;iostream&gt;</tt> is Forbidden</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The use of <tt>#include &lt;iostream&gt;</tt> in library files is
 hereby <b><em>forbidden</em></b>. The primary reason for doing this is to
@@ -1149,11 +1174,11 @@ the <tt>llvm::MemoryBuffer</tt> API for reading files.</b></p>
 
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ll_raw_ostream">Use <tt>raw_ostream</tt></a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM includes a lightweight, simple, and efficient stream implementation
 in <tt>llvm/Support/raw_ostream.h</tt>, which provides all of the common
@@ -1169,11 +1194,11 @@ declarations and constant references to <tt>raw_ostream</tt> instances.</p>
 
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ll_avoidendl">Avoid <tt>std::endl</tt></a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>std::endl</tt> modifier, when used with <tt>iostreams</tt> outputs a
 newline to the output stream specified.  In addition to doing this, however, it
@@ -1191,22 +1216,25 @@ it's better to use a literal <tt>'\n'</tt>.</p>
 
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="nano">Microscopic Details</a>
-</div>
+</h3>
 <!-- ======================================================================= -->
 
+<div>
+
 <p>This section describes preferred low-level formatting guidelines along with
 reasoning on why we prefer them.</p>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="micro_spaceparen">Spaces Before Parentheses</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>We prefer to put a space before an open parenthesis only in control flow
 statements, but not in normal function call expressions and function-like
@@ -1260,11 +1288,11 @@ this misinterpretation.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="micro_preincrement">Prefer Preincrement</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Hard fast rule: Preincrement (<tt>++X</tt>) may be no slower than
 postincrement (<tt>X++</tt>) and could very well be a lot faster than it.  Use
@@ -1280,11 +1308,11 @@ get in the habit of always using preincrement, and you won't have a problem.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="micro_namespaceindent">Namespace Indentation</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 In general, we strive to reduce indentation wherever possible.  This is useful
@@ -1368,11 +1396,11 @@ the contents of the namespace.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="micro_anonns">Anonymous Namespaces</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>After talking about namespaces in general, you may be wondering about
 anonymous namespaces in particular.
@@ -1452,15 +1480,17 @@ namespace just because it was declared there.
 
 </div>
 
+</div>
 
+</div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="seealso">See Also</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>A lot of these comments and recommendations have been culled for other
 sources.  Two particularly important books for our work are:</p>
@@ -1491,8 +1521,8 @@ something.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-02-20 03:03:04 +0100 (Sun, 20 Feb 2011) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/CommandGuide/FileCheck.pod b/docs/CommandGuide/FileCheck.pod
index 3ccaa63..dbd626c 100644
--- a/docs/CommandGuide/FileCheck.pod
+++ b/docs/CommandGuide/FileCheck.pod
@@ -240,6 +240,6 @@ define two separate CHECK lines that match on the same line.
 
 =head1 AUTHORS
 
-Maintained by The LLVM Team (L<http://llvm.org>).
+Maintained by The LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/bugpoint.pod b/docs/CommandGuide/bugpoint.pod
index 1870a0d..31db62f 100644
--- a/docs/CommandGuide/bugpoint.pod
+++ b/docs/CommandGuide/bugpoint.pod
@@ -21,7 +21,7 @@ distribution.
 
 =head1 OPTIONS
 
-=over 
+=over
 
 =item B<--additional-so> F<library>
 
@@ -87,7 +87,7 @@ mis-management.
 =item B<-find-bugs>
 
 Continually randomize the specified passes and run them on the test program
-until a bug is found or the user kills B<bugpoint>. 
+until a bug is found or the user kills B<bugpoint>.
 
 =item B<-help>
 
@@ -147,6 +147,21 @@ This option defines the command to use with the B<--run-custom> and
 B<--safe-custom> options to execute the bitcode testcase. This can
 be useful for cross-compilation.
 
+=item B<--compile-command> I<command>
+
+This option defines the command to use with the B<--compile-custom>
+option to compile the bitcode testcase. This can be useful for
+testing compiler output without running any link or execute stages. To
+generate a reduced unit test, you may add CHECK directives to the
+testcase and pass the name of an executable compile-command script in this form:
+
+    #!/bin/sh
+    llc "$@"
+    not FileCheck [bugpoint input file].ll < bugpoint-test-program.s
+
+This script will "fail" as long as FileCheck passes. So the result
+will be the minimum bitcode that passes FileCheck.
+
 =item B<--safe-path> I<path>
 
 This option defines the path to the command to execute with the
@@ -166,6 +181,6 @@ L<opt|opt>
 
 =head1 AUTHOR
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/index.html b/docs/CommandGuide/index.html
index 3c1a9f9..cb5438f 100644
--- a/docs/CommandGuide/index.html
+++ b/docs/CommandGuide/index.html
@@ -3,15 +3,15 @@
 <html>
 <head>
   <title>LLVM Command Guide</title>
-  <link rel="stylesheet" href="/docs/llvm.css" type="text/css">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
 </head>
 <body>
 
-<div class="doc_title">
+<h1>
   LLVM Command Guide
-</div>
+</h1>
 
-<div class="doc_text">
+<div>
 
 <p>These documents are HTML versions of the <a href="man/man1/">man pages</a>
 for all of the LLVM tools.  These pages describe how to use the LLVM commands
@@ -23,12 +23,12 @@ options) arguments to the tool you are interested in.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="basic">Basic Commands</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <ul>
 
@@ -80,12 +80,12 @@ options) arguments to the tool you are interested in.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="frontend">C and C++ Front-end Commands</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 <ul>
 
 <li><a href="/cmds/llvmgcc.html"><b>llvm-gcc</b></a> -
@@ -99,13 +99,13 @@ options) arguments to the tool you are interested in.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="debug">Debugging Tools</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
 
-<div class="doc_text">
+<div>
 
 <ul>
 
@@ -123,12 +123,12 @@ options) arguments to the tool you are interested in.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="internal">Internal Tools</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 <ul>
 
 <li><a href="/cmds/FileCheck.html"><b>FileCheck</b></a> -
@@ -150,8 +150,8 @@ options) arguments to the tool you are interested in.</p>
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-09-08 01:10:21 +0200 (Wed, 08 Sep 2010) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/CommandGuide/lit.pod b/docs/CommandGuide/lit.pod
index 989a5d7..faf4811 100644
--- a/docs/CommandGuide/lit.pod
+++ b/docs/CommandGuide/lit.pod
@@ -349,6 +349,6 @@ L<valgrind(1)>
 
 =head1 AUTHOR
 
-Written by Daniel Dunbar and maintained by the LLVM Team (L<http://llvm.org>).
+Written by Daniel Dunbar and maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/llc.pod b/docs/CommandGuide/llc.pod
index eb26ec0..50b45c8 100644
--- a/docs/CommandGuide/llc.pod
+++ b/docs/CommandGuide/llc.pod
@@ -196,6 +196,6 @@ L<lli|lli>
 
 =head1 AUTHORS
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/lli.pod b/docs/CommandGuide/lli.pod
index 52a2721..a313a31 100644
--- a/docs/CommandGuide/lli.pod
+++ b/docs/CommandGuide/lli.pod
@@ -214,6 +214,6 @@ L<llc|llc>
 
 =head1 AUTHOR
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/llvm-ar.pod b/docs/CommandGuide/llvm-ar.pod
index 63ba43f..a8f01b0 100644
--- a/docs/CommandGuide/llvm-ar.pod
+++ b/docs/CommandGuide/llvm-ar.pod
@@ -401,6 +401,6 @@ L<llvm-ranlib|llvm-ranlib>, ar(1)
 
 =head1 AUTHORS
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/llvm-as.pod b/docs/CommandGuide/llvm-as.pod
index 185c009..cc81887 100644
--- a/docs/CommandGuide/llvm-as.pod
+++ b/docs/CommandGuide/llvm-as.pod
@@ -72,6 +72,6 @@ L<llvm-dis|llvm-dis>, L<gccas|gccas>
 
 =head1 AUTHORS
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/llvm-bcanalyzer.pod b/docs/CommandGuide/llvm-bcanalyzer.pod
index b0bc0cd..9c5021b 100644
--- a/docs/CommandGuide/llvm-bcanalyzer.pod
+++ b/docs/CommandGuide/llvm-bcanalyzer.pod
@@ -268,7 +268,7 @@ The number of bytes consumed by instructions in the function.
 
 =item B<Average Instruction Size>
 
-The average number of bytes consumed by the instructions in the funtion. This
+The average number of bytes consumed by the instructions in the function. This
 value is computed by dividing Instruction Size by Instructions.
 
 =item B<Bytes Per Instruction>
@@ -310,6 +310,6 @@ L<llvm-dis|llvm-dis>, L<http://llvm.org/docs/BitCodeFormat.html>
 
 =head1 AUTHORS
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/llvm-config.pod b/docs/CommandGuide/llvm-config.pod
index 4e38dae..7d68564 100644
--- a/docs/CommandGuide/llvm-config.pod
+++ b/docs/CommandGuide/llvm-config.pod
@@ -126,6 +126,6 @@ occurs, it will exit with a non-zero value.
 
 =head1 AUTHORS
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/llvm-diff.pod b/docs/CommandGuide/llvm-diff.pod
index c8cfdb3..ffe0b48 100644
--- a/docs/CommandGuide/llvm-diff.pod
+++ b/docs/CommandGuide/llvm-diff.pod
@@ -48,6 +48,6 @@ massive detected differences in blocks.
 
 =head1 AUTHORS
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/llvm-dis.pod b/docs/CommandGuide/llvm-dis.pod
index 5b2f4ef..9f4026c 100644
--- a/docs/CommandGuide/llvm-dis.pod
+++ b/docs/CommandGuide/llvm-dis.pod
@@ -55,6 +55,6 @@ L<llvm-as|llvm-as>
 
 =head1 AUTHORS
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/llvm-extract.pod b/docs/CommandGuide/llvm-extract.pod
index d4baab7..797e79d 100644
--- a/docs/CommandGuide/llvm-extract.pod
+++ b/docs/CommandGuide/llvm-extract.pod
@@ -68,6 +68,6 @@ L<bugpoint|bugpoint>
 
 =head1 AUTHORS
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/llvm-ld.pod b/docs/CommandGuide/llvm-ld.pod
index 536ab0f..efa9ebd 100644
--- a/docs/CommandGuide/llvm-ld.pod
+++ b/docs/CommandGuide/llvm-ld.pod
@@ -229,6 +229,6 @@ L<llvm-link|llvm-link>
 
 =head1 AUTHORS
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/llvm-link.pod b/docs/CommandGuide/llvm-link.pod
index 8d06cc9..1e466a5 100644
--- a/docs/CommandGuide/llvm-link.pod
+++ b/docs/CommandGuide/llvm-link.pod
@@ -74,6 +74,6 @@ L<gccld|gccld>
 
 =head1 AUTHORS
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/llvm-nm.pod b/docs/CommandGuide/llvm-nm.pod
index a580d3f..a6dc490 100644
--- a/docs/CommandGuide/llvm-nm.pod
+++ b/docs/CommandGuide/llvm-nm.pod
@@ -117,6 +117,6 @@ L<llvm-dis|llvm-dis>, ar(1), nm(1)
 
 =head1 AUTHOR
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/llvm-prof.pod b/docs/CommandGuide/llvm-prof.pod
index 9541b05..4b2e09d 100644
--- a/docs/CommandGuide/llvm-prof.pod
+++ b/docs/CommandGuide/llvm-prof.pod
@@ -52,6 +52,6 @@ information. Otherwise, it exits with zero.
 
 =head1 AUTHOR
 
-B<llvm-prof> is maintained by the LLVM Team (L<http://llvm.org>).
+B<llvm-prof> is maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/llvm-ranlib.pod b/docs/CommandGuide/llvm-ranlib.pod
index 53cd34b..431bc55 100644
--- a/docs/CommandGuide/llvm-ranlib.pod
+++ b/docs/CommandGuide/llvm-ranlib.pod
@@ -47,6 +47,6 @@ L<llvm-ar|llvm-ar>, ranlib(1)
 
 =head1 AUTHORS
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/llvmc.pod b/docs/CommandGuide/llvmc.pod
index d237ca4..95a9e5e 100644
--- a/docs/CommandGuide/llvmc.pod
+++ b/docs/CommandGuide/llvmc.pod
@@ -185,6 +185,6 @@ L<llvm-dis|llvm-dis>, L<llc|llc>, L<llvm-link|llvm-link>
 
 =head1 AUTHORS
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/llvmgcc.pod b/docs/CommandGuide/llvmgcc.pod
index 9892ca7..30af0a06e 100644
--- a/docs/CommandGuide/llvmgcc.pod
+++ b/docs/CommandGuide/llvmgcc.pod
@@ -70,7 +70,7 @@ L<llvm-g++|llvmgxx>
 
 =head1 AUTHORS
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
 
diff --git a/docs/CommandGuide/llvmgxx.pod b/docs/CommandGuide/llvmgxx.pod
index 64b670e..1ea3d49 100644
--- a/docs/CommandGuide/llvmgxx.pod
+++ b/docs/CommandGuide/llvmgxx.pod
@@ -79,7 +79,7 @@ L<llvm-gcc|llvmgcc>
 
 =head1 AUTHORS
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
 
diff --git a/docs/CommandGuide/opt.pod b/docs/CommandGuide/opt.pod
index d1d1db5..f5f4968 100644
--- a/docs/CommandGuide/opt.pod
+++ b/docs/CommandGuide/opt.pod
@@ -138,6 +138,6 @@ occurs, it will exit with a non-zero value.
 
 =head1 AUTHORS
 
-Maintained by the LLVM Team (L<http://llvm.org>).
+Maintained by the LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandGuide/tblgen.pod b/docs/CommandGuide/tblgen.pod
index d127492..fe1be5e 100644
--- a/docs/CommandGuide/tblgen.pod
+++ b/docs/CommandGuide/tblgen.pod
@@ -110,6 +110,6 @@ occurs, it will exit with a non-zero value.
 
 =head1 AUTHORS
 
-Maintained by The LLVM Team (L<http://llvm.org>).
+Maintained by The LLVM Team (L<http://llvm.org/>).
 
 =cut
diff --git a/docs/CommandLine.html b/docs/CommandLine.html
index 2e5b3a2..7535ca4 100644
--- a/docs/CommandLine.html
+++ b/docs/CommandLine.html
@@ -8,9 +8,9 @@
 </head>
 <body>
 
-<div class="doc_title">
+<h1>
   CommandLine 2.0 Library Manual
-</div>
+</h1>
 
 <ol>
   <li><a href="#introduction">Introduction</a></li>
@@ -100,12 +100,12 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="introduction">Introduction</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This document describes the CommandLine argument processing library.  It will
 show you how to use it, and what it can do.  The CommandLine library uses a
@@ -184,12 +184,12 @@ href="mailto:sabre@nondot.org">Chris Lattner</a>.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="quickstart">Quick Start Guide</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This section of the manual runs through a simple CommandLine'ification of a
 basic compiler tool.  This is intended to show you how to jump into using the
@@ -231,11 +231,11 @@ represented like this:</p>
 <a href="#cl::opt">cl::opt</a>&lt;string&gt; OutputFilename("<i>o</i>", <a href="#cl::desc">cl::desc</a>("<i>Specify output filename</i>"), <a href="#cl::value_desc">cl::value_desc</a>("<i>filename</i>"));
 </pre></div>
 
-<p>This declares a global variable "<tt>OutputFilename</tt>" that is used to
-capture the result of the "<tt>o</tt>" argument (first parameter).  We specify
-that this is a simple scalar option by using the "<tt><a
-href="#cl::opt">cl::opt</a></tt>" template (as opposed to the <a
-href="#list">"<tt>cl::list</tt> template</a>), and tell the CommandLine library
+<p>This declares a global variable &quot;<tt>OutputFilename</tt>&quot; that is used to
+capture the result of the &quot;<tt>o</tt>&quot; argument (first parameter).  We specify
+that this is a simple scalar option by using the &quot;<tt><a
+href="#cl::opt">cl::opt</a></tt>&quot; template (as opposed to the <a
+href="#list">&quot;<tt>cl::list</tt> template</a>), and tell the CommandLine library
 that the data type that we are parsing is a string.</p>
 
 <p>The second and third parameters (which are optional) are used to specify what
@@ -321,14 +321,12 @@ OPTIONS:
 
 <p>... indicating that an input filename is expected.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="bool">Boolean Arguments</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>In addition to input and output filenames, we would like the compiler example
 to support three boolean flags: "<tt>-f</tt>" to force writing binary output to
@@ -406,11 +404,11 @@ and <a href="#list">lists</a> of options.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="alias">Argument Aliases</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>So far, the example works well, except for the fact that we need to check the
 quiet condition like this now:</p>
@@ -456,12 +454,12 @@ uses.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="onealternative">Selecting an alternative from a set of
   possibilities</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>So far we have seen how the CommandLine library handles builtin types like
 <tt>std::string</tt>, <tt>bool</tt> and <tt>int</tt>, but how does it handle
@@ -567,11 +565,11 @@ which is when you would use it.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="namedalternatives">Named Alternatives</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Another useful argument form is a named alternative style.  We shall use this
 style in our compiler to specify different debug levels that can be used.
@@ -629,11 +627,11 @@ that you can choose the form most appropriate for your application.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="list">Parsing a list of options</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Now that we have the standard run-of-the-mill argument types out of the way,
 lets get a little wild and crazy.  Lets say that we want our optimizer to accept
@@ -699,11 +697,11 @@ checking we have to do.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="bits">Collecting options as a set of flags</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Instead of collecting sets of options in a list, it is also possible to
 gather information for enum values in a <b>bit vector</b>.  The representation used by
@@ -758,11 +756,11 @@ href="#list"> <tt>cl::list</tt></a> option.</p>
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="description">Adding freeform text to help output</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>As our program grows and becomes more mature, we may decide to put summary
 information about what it does into the help output.  The help output is styled
@@ -800,28 +798,27 @@ OPTIONS:
 
 </div>
 
+</div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="referenceguide">Reference Guide</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Now that you know the basics of how to use the CommandLine library, this
 section will give you the detailed information you need to tune how command line
 options work, as well as information on more "advanced" command line option
 processing capabilities.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="positional">Positional Arguments</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Positional arguments are those arguments that are not named, and are not
 specified with a hyphen.  Positional arguments should be used when an option is
@@ -854,15 +851,12 @@ that command line options will be ordered according to how they are listed in a
 are defined in multiple .cpp files.  The fix for this problem is simply to
 define all of your positional arguments in one .cpp file.</p>
 
-</div>
-
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="--">Specifying positional options with hyphens</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Sometimes you may want to specify a value to your positional argument that
 starts with a hyphen (for example, searching for '<tt>-foo</tt>' in a file).  At
@@ -895,10 +889,10 @@ can use it like this:</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="getPosition">Determining absolute position with getPosition()</a>
-</div>
-<div class="doc_text">
+</h4>
+<div>
   <p>Sometimes an option can affect or modify the meaning of another option. For
   example, consider <tt>gcc</tt>'s <tt>-x LANG</tt> option. This tells
   <tt>gcc</tt> to ignore the suffix of subsequent positional arguments and force
@@ -954,11 +948,11 @@ can use it like this:</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="cl::ConsumeAfter">The <tt>cl::ConsumeAfter</tt> modifier</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>cl::ConsumeAfter</tt> <a href="#formatting">formatting option</a> is
 used to construct programs that use "interpreter style" option processing.  With
@@ -1006,12 +1000,14 @@ href="#cl::list">cl::list</a> option.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="storage">Internal vs External Storage</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>By default, all command line options automatically hold the value that they
 parse from the command line.  This is very convenient in the common case,
@@ -1076,11 +1072,11 @@ that <tt>DebugFlag</tt> is automatically set.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="attributes">Option Attributes</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>This section describes the basic attributes that you can specify on
 options.</p>
@@ -1166,11 +1162,11 @@ obviously).</li>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="modifiers">Option Modifiers</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Option modifiers are the flags and expressions that you pass into the
 constructors for <tt><a href="#cl::opt">cl::opt</a></tt> and <tt><a
@@ -1196,14 +1192,12 @@ category.  The CommandLine library specifies defaults for all of these settings
 that are the most useful in practice and the most common, which mean that you
 usually shouldn't have to worry about these.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="hiding">Hiding an option from <tt>-help</tt> output</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>cl::NotHidden</tt>, <tt>cl::Hidden</tt>, and
 <tt>cl::ReallyHidden</tt> modifiers are used to control whether or not an option
@@ -1230,12 +1224,12 @@ indicates that the option should not appear in any help output.</li>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="numoccurrences">Controlling the number of occurrences required and
   allowed</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>This group of options is used to control how many time an option is allowed
 (or required) to be specified on the command line of your program.  Specifying a
@@ -1279,11 +1273,11 @@ retained.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="valrequired">Controlling whether or not a value must be specified</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>This group of options is used to control whether or not the option allows a
 value to be present.  In the case of the CommandLine library, a value is either
@@ -1328,11 +1322,11 @@ when <a href="#extensionguide">extending the library</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="formatting">Controlling other formatting options</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The formatting option group is used to specify that the command line option
 has special abilities and is otherwise different from other command line
@@ -1409,11 +1403,11 @@ strategy basically looks like this:</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="misc">Miscellaneous option modifiers</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The miscellaneous option modifiers are the only flags where you can specify
 more than one flag from the set: they are not mutually exclusive.  These flags
@@ -1453,11 +1447,11 @@ only makes sense with a <a href="#cl::list">cl::list</a> option.</li>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="response">Response files</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Some systems, such as certain variants of Microsoft Windows and
 some older Unices have a relatively low limit on command-line
@@ -1474,13 +1468,14 @@ and
 
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="toplevel">Top-Level Classes and Functions</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Despite all of the built-in flexibility, the CommandLine option library
 really only consists of one function (<a
@@ -1490,15 +1485,13 @@ href="#cl::list"><tt>cl::list</tt></a>, and <a
 href="#cl::alias"><tt>cl::alias</tt></a>.  This section describes these three
 classes in detail.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="cl::ParseCommandLineOptions">The <tt>cl::ParseCommandLineOptions</tt>
   function</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>cl::ParseCommandLineOptions</tt> function is designed to be called
 directly from <tt>main</tt>, and is used to fill in the values of all of the
@@ -1514,12 +1507,12 @@ which holds <a href="#description">additional extra text</a> to emit when the
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="cl::ParseEnvironmentOptions">The <tt>cl::ParseEnvironmentOptions</tt>
   function</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>cl::ParseEnvironmentOptions</tt> function has mostly the same effects
 as <a
@@ -1551,12 +1544,12 @@ input.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="cl::SetVersionPrinter">The <tt>cl::SetVersionPrinter</tt>
   function</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>cl::SetVersionPrinter</tt> function is designed to be called
 directly from <tt>main</tt> and <i>before</i>
@@ -1572,11 +1565,11 @@ called when the <tt>--version</tt> option is given by the user.</p>
 
 </div>
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="cl::opt">The <tt>cl::opt</tt> class</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>cl::opt</tt> class is the class used to represent scalar command line
 options, and is the one used most of the time.  It is a templated class which
@@ -1607,11 +1600,11 @@ href="#customparser">custom parser</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="cl::list">The <tt>cl::list</tt> class</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>cl::list</tt> class is the class used to represent a list of command
 line options.  It too is a templated class which can take up to three
@@ -1634,11 +1627,11 @@ be used.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="cl::bits">The <tt>cl::bits</tt> class</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>cl::bits</tt> class is the class used to represent a list of command
 line options in the form of a bit vector.  It is also a templated class which
@@ -1659,11 +1652,11 @@ must be of <b>type</b> <tt>unsigned</tt> if external storage is used.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="cl::alias">The <tt>cl::alias</tt> class</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>cl::alias</tt> class is a nontemplated class that is used to form
 aliases for other arguments.</p>
@@ -1682,11 +1675,11 @@ the conversion from string to data.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="cl::extrahelp">The <tt>cl::extrahelp</tt> class</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>cl::extrahelp</tt> class is a nontemplated class that allows extra
 help text to be printed out for the <tt>-help</tt> option.</p>
@@ -1709,12 +1702,14 @@ single <tt>cl::extrahelp</tt> instance.</p>
 </pre></div>
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="builtinparsers">Builtin parsers</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Parsers control how the string value taken from the command line is
 translated into a typed value, suitable for use in a C++ program.  By default,
@@ -1773,27 +1768,27 @@ exponential notation (ex: <tt>1.7e15</tt>) and properly supports locales.
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="extensionguide">Extension Guide</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Although the CommandLine library has a lot of functionality built into it
 already (as discussed previously), one of its true strengths lie in its
 extensibility.  This section discusses how the CommandLine library works under
 the covers and illustrates how to do some simple, common, extensions.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="customparser">Writing a custom parser</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>One of the simplest and most common extensions is the use of a custom parser.
 As <a href="#builtinparsers">discussed previously</a>, parsers are the portion
@@ -1932,11 +1927,11 @@ tutorial.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="explotingexternal">Exploiting external storage</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
   <p>Several of the LLVM libraries define static <tt>cl::opt</tt> instances that
   will automatically be included in any program that links with that library.
   This is a feature. However, sometimes it is necessary to know the value of the
@@ -1951,16 +1946,18 @@ tutorial.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="dynamicopts">Dynamically adding command line options</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>TODO: fill in this section</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 
 <hr>
@@ -1971,8 +1968,8 @@ tutorial.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/CompilerDriver.html b/docs/CompilerDriver.html
index c63e174..03db3a0 100644
--- a/docs/CompilerDriver.html
+++ b/docs/CompilerDriver.html
@@ -3,7 +3,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-<meta name="generator" content="Docutils 0.5: http://docutils.sourceforge.net/" />
+<meta name="generator" content="Docutils 0.6: http://docutils.sourceforge.net/" />
 <title>Customizing LLVMC: Reference Manual</title>
 <link rel="stylesheet" href="llvm.css" type="text/css" />
 </head>
@@ -17,28 +17,23 @@ The ReST source lives in the directory 'tools/llvmc/doc'. -->
 <div class="contents topic" id="contents">
 <p class="topic-title first">Contents</p>
 <ul class="simple">
-<li><a class="reference internal" href="#introduction" id="id8">Introduction</a></li>
-<li><a class="reference internal" href="#compiling-with-llvmc" id="id9">Compiling with LLVMC</a></li>
-<li><a class="reference internal" href="#predefined-options" id="id10">Predefined options</a></li>
-<li><a class="reference internal" href="#compiling-llvmc-plugins" id="id11">Compiling LLVMC plugins</a></li>
-<li><a class="reference internal" href="#compiling-standalone-llvmc-based-drivers" id="id12">Compiling standalone LLVMC-based drivers</a></li>
-<li><a class="reference internal" href="#customizing-llvmc-the-compilation-graph" id="id13">Customizing LLVMC: the compilation graph</a></li>
-<li><a class="reference internal" href="#describing-options" id="id14">Describing options</a><ul>
-<li><a class="reference internal" href="#external-options" id="id15">External options</a></li>
+<li><a class="reference internal" href="#introduction" id="id7">Introduction</a></li>
+<li><a class="reference internal" href="#compiling-with-llvmc" id="id8">Compiling with <tt class="docutils literal">llvmc</tt></a></li>
+<li><a class="reference internal" href="#predefined-options" id="id9">Predefined options</a></li>
+<li><a class="reference internal" href="#compiling-llvmc-based-drivers" id="id10">Compiling LLVMC-based drivers</a></li>
+<li><a class="reference internal" href="#customizing-llvmc-the-compilation-graph" id="id11">Customizing LLVMC: the compilation graph</a></li>
+<li><a class="reference internal" href="#describing-options" id="id12">Describing options</a></li>
+<li><a class="reference internal" href="#conditional-evaluation" id="id13">Conditional evaluation</a></li>
+<li><a class="reference internal" href="#writing-a-tool-description" id="id14">Writing a tool description</a><ul>
+<li><a class="reference internal" href="#id4" id="id15">Actions</a></li>
 </ul>
 </li>
-<li><a class="reference internal" href="#conditional-evaluation" id="id16">Conditional evaluation</a></li>
-<li><a class="reference internal" href="#writing-a-tool-description" id="id17">Writing a tool description</a><ul>
-<li><a class="reference internal" href="#id5" id="id18">Actions</a></li>
-</ul>
-</li>
-<li><a class="reference internal" href="#language-map" id="id19">Language map</a></li>
-<li><a class="reference internal" href="#option-preprocessor" id="id20">Option preprocessor</a></li>
-<li><a class="reference internal" href="#more-advanced-topics" id="id21">More advanced topics</a><ul>
-<li><a class="reference internal" href="#hooks-and-environment-variables" id="id22">Hooks and environment variables</a></li>
-<li><a class="reference internal" href="#how-plugins-are-loaded" id="id23">How plugins are loaded</a></li>
-<li><a class="reference internal" href="#debugging" id="id24">Debugging</a></li>
-<li><a class="reference internal" href="#conditioning-on-the-executable-name" id="id25">Conditioning on the executable name</a></li>
+<li><a class="reference internal" href="#language-map" id="id16">Language map</a></li>
+<li><a class="reference internal" href="#option-preprocessor" id="id17">Option preprocessor</a></li>
+<li><a class="reference internal" href="#more-advanced-topics" id="id18">More advanced topics</a><ul>
+<li><a class="reference internal" href="#hooks-and-environment-variables" id="id19">Hooks and environment variables</a></li>
+<li><a class="reference internal" href="#debugging" id="id20">Debugging</a></li>
+<li><a class="reference internal" href="#conditioning-on-the-executable-name" id="id21">Conditioning on the executable name</a></li>
 </ul>
 </li>
 </ul>
@@ -46,25 +41,24 @@ The ReST source lives in the directory 'tools/llvmc/doc'. -->
 <div class="doc_author">
 <p>Written by <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a></p>
 </div><div class="section" id="introduction">
-<h1><a class="toc-backref" href="#id8">Introduction</a></h1>
+<h1><a class="toc-backref" href="#id7">Introduction</a></h1>
 <p>LLVMC is a generic compiler driver, designed to be customizable and
-extensible. It plays the same role for LLVM as the <tt class="docutils literal"><span class="pre">gcc</span></tt> program
-does for GCC - LLVMC's job is essentially to transform a set of input
-files into a set of targets depending on configuration rules and user
-options. What makes LLVMC different is that these transformation rules
-are completely customizable - in fact, LLVMC knows nothing about the
-specifics of transformation (even the command-line options are mostly
-not hard-coded) and regards the transformation structure as an
-abstract graph. The structure of this graph is completely determined
-by plugins, which can be either statically or dynamically linked. This
-makes it possible to easily adapt LLVMC for other purposes - for
-example, as a build tool for game resources.</p>
+extensible. It plays the same role for LLVM as the <tt class="docutils literal">gcc</tt> program does for
+GCC - LLVMC's job is essentially to transform a set of input files into a set of
+targets depending on configuration rules and user options. What makes LLVMC
+different is that these transformation rules are completely customizable - in
+fact, LLVMC knows nothing about the specifics of transformation (even the
+command-line options are mostly not hard-coded) and regards the transformation
+structure as an abstract graph. The structure of this graph is described in
+high-level TableGen code, from which an efficient C++ representation is
+automatically derived. This makes it possible to adapt LLVMC for other
+purposes - for example, as a build tool for game resources.</p>
 <p>Because LLVMC employs <a class="reference external" href="http://llvm.org/docs/TableGenFundamentals.html">TableGen</a> as its configuration language, you
 need to be familiar with it to customize LLVMC.</p>
 </div>
 <div class="section" id="compiling-with-llvmc">
-<h1><a class="toc-backref" href="#id9">Compiling with LLVMC</a></h1>
-<p>LLVMC tries hard to be as compatible with <tt class="docutils literal"><span class="pre">gcc</span></tt> as possible,
+<h1><a class="toc-backref" href="#id8">Compiling with <tt class="docutils literal">llvmc</tt></a></h1>
+<p>LLVMC tries hard to be as compatible with <tt class="docutils literal">gcc</tt> as possible,
 although there are some small differences. Most of the time, however,
 you shouldn't be able to notice them:</p>
 <pre class="literal-block">
@@ -74,11 +68,11 @@ $ ./a.out
 hello
 </pre>
 <p>One nice feature of LLVMC is that one doesn't have to distinguish between
-different compilers for different languages (think <tt class="docutils literal"><span class="pre">g++</span></tt> vs.  <tt class="docutils literal"><span class="pre">gcc</span></tt>) - the
+different compilers for different languages (think <tt class="docutils literal">g++</tt> vs.  <tt class="docutils literal">gcc</tt>) - the
 right toolchain is chosen automatically based on input language names (which
 are, in turn, determined from file extensions). If you want to force files
 ending with &quot;.c&quot; to compile as C++, use the <tt class="docutils literal"><span class="pre">-x</span></tt> option, just like you would
-do it with <tt class="docutils literal"><span class="pre">gcc</span></tt>:</p>
+do it with <tt class="docutils literal">gcc</tt>:</p>
 <pre class="literal-block">
 $ # hello.c is really a C++ file
 $ llvmc -x c++ hello.c
@@ -97,138 +91,100 @@ $ ./a.out
 hello
 </pre>
 <p>By default, LLVMC uses <tt class="docutils literal"><span class="pre">llvm-gcc</span></tt> to compile the source code. It is also
-possible to choose the <tt class="docutils literal"><span class="pre">clang</span></tt> compiler with the <tt class="docutils literal"><span class="pre">-clang</span></tt> option.</p>
+possible to choose the <tt class="docutils literal">clang</tt> compiler with the <tt class="docutils literal"><span class="pre">-clang</span></tt> option.</p>
 </div>
 <div class="section" id="predefined-options">
-<h1><a class="toc-backref" href="#id10">Predefined options</a></h1>
-<p>LLVMC has some built-in options that can't be overridden in the
-configuration libraries:</p>
+<h1><a class="toc-backref" href="#id9">Predefined options</a></h1>
+<p>LLVMC has some built-in options that can't be overridden in the TableGen code:</p>
 <ul class="simple">
-<li><tt class="docutils literal"><span class="pre">-o</span> <span class="pre">FILE</span></tt> - Output file name.</li>
-<li><tt class="docutils literal"><span class="pre">-x</span> <span class="pre">LANGUAGE</span></tt> - Specify the language of the following input files
+<li><tt class="docutils literal"><span class="pre">-o</span> FILE</tt> - Output file name.</li>
+<li><tt class="docutils literal"><span class="pre">-x</span> LANGUAGE</tt> - Specify the language of the following input files
 until the next -x option.</li>
-<li><tt class="docutils literal"><span class="pre">-load</span> <span class="pre">PLUGIN_NAME</span></tt> - Load the specified plugin DLL. Example:
-<tt class="docutils literal"><span class="pre">-load</span> <span class="pre">$LLVM_DIR/Release/lib/LLVMCSimple.so</span></tt>.</li>
 <li><tt class="docutils literal"><span class="pre">-v</span></tt> - Enable verbose mode, i.e. print out all executed commands.</li>
 <li><tt class="docutils literal"><span class="pre">--save-temps</span></tt> - Write temporary files to the current directory and do not
 delete them on exit. This option can also take an argument: the
 <tt class="docutils literal"><span class="pre">--save-temps=obj</span></tt> switch will write files into the directory specified with
 the <tt class="docutils literal"><span class="pre">-o</span></tt> option. The <tt class="docutils literal"><span class="pre">--save-temps=cwd</span></tt> and <tt class="docutils literal"><span class="pre">--save-temps</span></tt> switches are
 both synonyms for the default behaviour.</li>
-<li><tt class="docutils literal"><span class="pre">--temp-dir</span> <span class="pre">DIRECTORY</span></tt> - Store temporary files in the given directory. This
+<li><tt class="docutils literal"><span class="pre">--temp-dir</span> DIRECTORY</tt> - Store temporary files in the given directory. This
 directory is deleted on exit unless <tt class="docutils literal"><span class="pre">--save-temps</span></tt> is specified. If
 <tt class="docutils literal"><span class="pre">--save-temps=obj</span></tt> is also specified, <tt class="docutils literal"><span class="pre">--temp-dir</span></tt> is given the
 precedence.</li>
 <li><tt class="docutils literal"><span class="pre">--check-graph</span></tt> - Check the compilation for common errors like mismatched
-output/input language names, multiple default edges and cycles. Because of
-plugins, these checks can't be performed at compile-time. Exit with code zero
-if no errors were found, and return the number of found errors
-otherwise. Hidden option, useful for debugging LLVMC plugins.</li>
+output/input language names, multiple default edges and cycles. Exit with code
+zero if no errors were found, and return the number of found errors
+otherwise. Hidden option, useful for debugging.</li>
 <li><tt class="docutils literal"><span class="pre">--view-graph</span></tt> - Show a graphical representation of the compilation graph
-and exit. Requires that you have <tt class="docutils literal"><span class="pre">dot</span></tt> and <tt class="docutils literal"><span class="pre">gv</span></tt> programs installed. Hidden
-option, useful for debugging LLVMC plugins.</li>
+and exit. Requires that you have <tt class="docutils literal">dot</tt> and <tt class="docutils literal">gv</tt> programs installed. Hidden
+option, useful for debugging.</li>
 <li><tt class="docutils literal"><span class="pre">--write-graph</span></tt> - Write a <tt class="docutils literal"><span class="pre">compilation-graph.dot</span></tt> file in the current
 directory with the compilation graph description in Graphviz format (identical
 to the file used by the <tt class="docutils literal"><span class="pre">--view-graph</span></tt> option). The <tt class="docutils literal"><span class="pre">-o</span></tt> option can be
-used to set the output file name. Hidden option, useful for debugging LLVMC
-plugins.</li>
-<li><tt class="docutils literal"><span class="pre">-help</span></tt>, <tt class="docutils literal"><span class="pre">-help-hidden</span></tt>, <tt class="docutils literal"><span class="pre">--version</span></tt> - These options have
+used to set the output file name. Hidden option, useful for debugging.</li>
+<li><tt class="docutils literal"><span class="pre">--help</span></tt>, <tt class="docutils literal"><span class="pre">--help-hidden</span></tt>, <tt class="docutils literal"><span class="pre">--version</span></tt> - These options have
 their standard meaning.</li>
 </ul>
 </div>
-<div class="section" id="compiling-llvmc-plugins">
-<h1><a class="toc-backref" href="#id11">Compiling LLVMC plugins</a></h1>
-<p>It's easiest to start working on your own LLVMC plugin by copying the
-skeleton project which lives under <tt class="docutils literal"><span class="pre">$LLVMC_DIR/plugins/Simple</span></tt>:</p>
+<div class="section" id="compiling-llvmc-based-drivers">
+<h1><a class="toc-backref" href="#id10">Compiling LLVMC-based drivers</a></h1>
+<p>It's easiest to start working on your own LLVMC driver by copying the skeleton
+project which lives under <tt class="docutils literal">$LLVMC_DIR/examples/Skeleton</tt>:</p>
 <pre class="literal-block">
-$ cd $LLVMC_DIR/plugins
-$ cp -r Simple MyPlugin
-$ cd MyPlugin
+$ cd $LLVMC_DIR/examples
+$ cp -r Skeleton MyDriver
+$ cd MyDriver
 $ ls
-Makefile PluginMain.cpp Simple.td
-</pre>
-<p>As you can see, our basic plugin consists of only two files (not
-counting the build script). <tt class="docutils literal"><span class="pre">Simple.td</span></tt> contains TableGen
-description of the compilation graph; its format is documented in the
-following sections. <tt class="docutils literal"><span class="pre">PluginMain.cpp</span></tt> is just a helper file used to
-compile the auto-generated C++ code produced from TableGen source. It
-can also contain hook definitions (see <a class="reference internal" href="#hooks">below</a>).</p>
-<p>The first thing that you should do is to change the <tt class="docutils literal"><span class="pre">LLVMC_PLUGIN</span></tt>
-variable in the <tt class="docutils literal"><span class="pre">Makefile</span></tt> to avoid conflicts (since this variable
-is used to name the resulting library):</p>
-<pre class="literal-block">
-LLVMC_PLUGIN=MyPlugin
+AutoGenerated.td  Hooks.cpp  Main.cpp  Makefile
 </pre>
-<p>It is also a good idea to rename <tt class="docutils literal"><span class="pre">Simple.td</span></tt> to something less
-generic:</p>
+<p>As you can see, our basic driver consists of only three files (not counting the
+build script). <tt class="docutils literal">AutoGenerated.td</tt> contains TableGen description of the
+compilation graph; its format is documented in the following
+sections. <tt class="docutils literal">Hooks.cpp</tt> is an empty file that should be used for hook
+definitions (see <a class="reference internal" href="#hooks">below</a>). <tt class="docutils literal">Main.cpp</tt> is just a helper used to compile the
+auto-generated C++ code produced from TableGen source.</p>
+<p>The first thing that you should do is to change the <tt class="docutils literal">LLVMC_BASED_DRIVER</tt>
+variable in the <tt class="docutils literal">Makefile</tt>:</p>
 <pre class="literal-block">
-$ mv Simple.td MyPlugin.td
+LLVMC_BASED_DRIVER=MyDriver
 </pre>
-<p>To build your plugin as a dynamic library, just <tt class="docutils literal"><span class="pre">cd</span></tt> to its source
-directory and run <tt class="docutils literal"><span class="pre">make</span></tt>. The resulting file will be called
-<tt class="docutils literal"><span class="pre">plugin_llvmc_$(LLVMC_PLUGIN).$(DLL_EXTENSION)</span></tt> (in our case,
-<tt class="docutils literal"><span class="pre">plugin_llvmc_MyPlugin.so</span></tt>). This library can be then loaded in with the
-<tt class="docutils literal"><span class="pre">-load</span></tt> option. Example:</p>
-<pre class="literal-block">
-$ cd $LLVMC_DIR/plugins/Simple
-$ make
-$ llvmc -load $LLVM_DIR/Release/lib/plugin_llvmc_Simple.so
-</pre>
-</div>
-<div class="section" id="compiling-standalone-llvmc-based-drivers">
-<h1><a class="toc-backref" href="#id12">Compiling standalone LLVMC-based drivers</a></h1>
-<p>By default, the <tt class="docutils literal"><span class="pre">llvmc</span></tt> executable consists of a driver core plus several
-statically linked plugins (<tt class="docutils literal"><span class="pre">Base</span></tt> and <tt class="docutils literal"><span class="pre">Clang</span></tt> at the moment). You can
-produce a standalone LLVMC-based driver executable by linking the core with your
-own plugins. The recommended way to do this is by starting with the provided
-<tt class="docutils literal"><span class="pre">Skeleton</span></tt> example (<tt class="docutils literal"><span class="pre">$LLVMC_DIR/example/Skeleton</span></tt>):</p>
+<p>It can also be a good idea to put your TableGen code into a file with a less
+generic name:</p>
 <pre class="literal-block">
-$ cd $LLVMC_DIR/example/
-$ cp -r Skeleton mydriver
-$ cd mydriver
-$ vim Makefile
+$ touch MyDriver.td
+$ vim AutoGenerated.td
 [...]
-$ make
+include &quot;MyDriver.td&quot;
 </pre>
+<p>If you have more than one TableGen source file, they all should be included from
+<tt class="docutils literal">AutoGenerated.td</tt>, since this file is used by the build system to generate
+C++ code.</p>
+<p>To build your driver, just <tt class="docutils literal">cd</tt> to its source directory and run <tt class="docutils literal">make</tt>. The
+resulting executable will be put into <tt class="docutils literal"><span class="pre">$LLVM_OBJ_DIR/$(BuildMode)/bin</span></tt>.</p>
 <p>If you're compiling LLVM with different source and object directories, then you
-must perform the following additional steps before running <tt class="docutils literal"><span class="pre">make</span></tt>:</p>
+must perform the following additional steps before running <tt class="docutils literal">make</tt>:</p>
 <pre class="literal-block">
 # LLVMC_SRC_DIR = $LLVM_SRC_DIR/tools/llvmc/
 # LLVMC_OBJ_DIR = $LLVM_OBJ_DIR/tools/llvmc/
-$ cp $LLVMC_SRC_DIR/example/mydriver/Makefile \
-  $LLVMC_OBJ_DIR/example/mydriver/
-$ cd $LLVMC_OBJ_DIR/example/mydriver
+$ mkdir $LLVMC_OBJ_DIR/examples/MyDriver/
+$ cp $LLVMC_SRC_DIR/examples/MyDriver/Makefile \
+  $LLVMC_OBJ_DIR/examples/MyDriver/
+$ cd $LLVMC_OBJ_DIR/examples/MyDriver
 $ make
 </pre>
-<p>Another way to do the same thing is by using the following command:</p>
-<pre class="literal-block">
-$ cd $LLVMC_DIR
-$ make LLVMC_BUILTIN_PLUGINS=MyPlugin LLVMC_BASED_DRIVER_NAME=mydriver
-</pre>
-<p>This works with both srcdir == objdir and srcdir != objdir, but assumes that the
-plugin source directory was placed under <tt class="docutils literal"><span class="pre">$LLVMC_DIR/plugins</span></tt>.</p>
-<p>Sometimes, you will want a 'bare-bones' version of LLVMC that has no
-built-in plugins. It can be compiled with the following command:</p>
-<pre class="literal-block">
-$ cd $LLVMC_DIR
-$ make LLVMC_BUILTIN_PLUGINS=&quot;&quot;
-</pre>
 </div>
 <div class="section" id="customizing-llvmc-the-compilation-graph">
-<h1><a class="toc-backref" href="#id13">Customizing LLVMC: the compilation graph</a></h1>
-<p>Each TableGen configuration file should include the common
-definitions:</p>
+<h1><a class="toc-backref" href="#id11">Customizing LLVMC: the compilation graph</a></h1>
+<p>Each TableGen configuration file should include the common definitions:</p>
 <pre class="literal-block">
 include &quot;llvm/CompilerDriver/Common.td&quot;
 </pre>
-<p>Internally, LLVMC stores information about possible source
-transformations in form of a graph. Nodes in this graph represent
-tools, and edges between two nodes represent a transformation path. A
-special &quot;root&quot; node is used to mark entry points for the
-transformations. LLVMC also assigns a weight to each edge (more on
-this later) to choose between several alternative edges.</p>
-<p>The definition of the compilation graph (see file
-<tt class="docutils literal"><span class="pre">plugins/Base/Base.td</span></tt> for an example) is just a list of edges:</p>
+<p>Internally, LLVMC stores information about possible source transformations in
+form of a graph. Nodes in this graph represent tools, and edges between two
+nodes represent a transformation path. A special &quot;root&quot; node is used to mark
+entry points for the transformations. LLVMC also assigns a weight to each edge
+(more on this later) to choose between several alternative edges.</p>
+<p>The definition of the compilation graph (see file <tt class="docutils literal">llvmc/src/Base.td</tt> for an
+example) is just a list of edges:</p>
 <pre class="literal-block">
 def CompilationGraph : CompilationGraph&lt;[
     Edge&lt;&quot;root&quot;, &quot;llvm_gcc_c&quot;&gt;,
@@ -253,39 +209,33 @@ def CompilationGraph : CompilationGraph&lt;[
 
     ]&gt;;
 </pre>
-<p>As you can see, the edges can be either default or optional, where
-optional edges are differentiated by an additional <tt class="docutils literal"><span class="pre">case</span></tt> expression
-used to calculate the weight of this edge. Notice also that we refer
-to tools via their names (as strings). This makes it possible to add
-edges to an existing compilation graph in plugins without having to
-know about all tool definitions used in the graph.</p>
-<p>The default edges are assigned a weight of 1, and optional edges get a
-weight of 0 + 2*N where N is the number of tests that evaluated to
-true in the <tt class="docutils literal"><span class="pre">case</span></tt> expression. It is also possible to provide an
-integer parameter to <tt class="docutils literal"><span class="pre">inc_weight</span></tt> and <tt class="docutils literal"><span class="pre">dec_weight</span></tt> - in this case,
-the weight is increased (or decreased) by the provided value instead
-of the default 2. It is also possible to change the default weight of
-an optional edge by using the <tt class="docutils literal"><span class="pre">default</span></tt> clause of the <tt class="docutils literal"><span class="pre">case</span></tt>
+<p>As you can see, the edges can be either default or optional, where optional
+edges are differentiated by an additional <tt class="docutils literal">case</tt> expression used to calculate
+the weight of this edge. Notice also that we refer to tools via their names (as
+strings). This makes it possible to add edges to an existing compilation graph
+without having to know about all tool definitions used in the graph.</p>
+<p>The default edges are assigned a weight of 1, and optional edges get a weight of
+0 + 2*N where N is the number of tests that evaluated to true in the <tt class="docutils literal">case</tt>
+expression. It is also possible to provide an integer parameter to
+<tt class="docutils literal">inc_weight</tt> and <tt class="docutils literal">dec_weight</tt> - in this case, the weight is increased (or
+decreased) by the provided value instead of the default 2. Default weight of an
+optional edge can be changed by using the <tt class="docutils literal">default</tt> clause of the <tt class="docutils literal">case</tt>
 construct.</p>
-<p>When passing an input file through the graph, LLVMC picks the edge
-with the maximum weight. To avoid ambiguity, there should be only one
-default edge between two nodes (with the exception of the root node,
-which gets a special treatment - there you are allowed to specify one
-default edge <em>per language</em>).</p>
-<p>When multiple plugins are loaded, their compilation graphs are merged
-together. Since multiple edges that have the same end nodes are not
-allowed (i.e. the graph is not a multigraph), an edge defined in
-several plugins will be replaced by the definition from the plugin
-that was loaded last. Plugin load order can be controlled by using the
-plugin priority feature described above.</p>
-<p>To get a visual representation of the compilation graph (useful for
-debugging), run <tt class="docutils literal"><span class="pre">llvmc</span> <span class="pre">--view-graph</span></tt>. You will need <tt class="docutils literal"><span class="pre">dot</span></tt> and
-<tt class="docutils literal"><span class="pre">gsview</span></tt> installed for this to work properly.</p>
+<p>When passing an input file through the graph, LLVMC picks the edge with the
+maximum weight. To avoid ambiguity, there should be only one default edge
+between two nodes (with the exception of the root node, which gets a special
+treatment - there you are allowed to specify one default edge <em>per language</em>).</p>
+<p>When multiple compilation graphs are defined, they are merged together. Multiple
+edges with the same end nodes are not allowed (i.e. the graph is not a
+multigraph), and will lead to a compile-time error.</p>
+<p>To get a visual representation of the compilation graph (useful for debugging),
+run <tt class="docutils literal">llvmc <span class="pre">--view-graph</span></tt>. You will need <tt class="docutils literal">dot</tt> and <tt class="docutils literal">gsview</tt> installed for
+this to work properly.</p>
 </div>
 <div class="section" id="describing-options">
-<h1><a class="toc-backref" href="#id14">Describing options</a></h1>
-<p>Command-line options that the plugin supports are defined by using an
-<tt class="docutils literal"><span class="pre">OptionList</span></tt>:</p>
+<h1><a class="toc-backref" href="#id12">Describing options</a></h1>
+<p>Command-line options supported by the driver are defined by using an
+<tt class="docutils literal">OptionList</tt>:</p>
 <pre class="literal-block">
 def Options : OptionList&lt;[
 (switch_option &quot;E&quot;, (help &quot;Help string&quot;)),
@@ -293,101 +243,95 @@ def Options : OptionList&lt;[
 ...
 ]&gt;;
 </pre>
-<p>As you can see, the option list is just a list of DAGs, where each DAG
-is an option description consisting of the option name and some
-properties. A plugin can define more than one option list (they are
-all merged together in the end), which can be handy if one wants to
-separate option groups syntactically.</p>
+<p>As you can see, the option list is just a list of DAGs, where each DAG is an
+option description consisting of the option name and some properties. More than
+one option list can be defined (they are all merged together in the end), which
+can be handy if one wants to separate option groups syntactically.</p>
 <ul>
 <li><p class="first">Possible option types:</p>
 <blockquote>
 <ul class="simple">
-<li><tt class="docutils literal"><span class="pre">switch_option</span></tt> - a simple boolean switch without arguments, for example
-<tt class="docutils literal"><span class="pre">-O2</span></tt> or <tt class="docutils literal"><span class="pre">-time</span></tt>. At most one occurrence is allowed.</li>
-<li><tt class="docutils literal"><span class="pre">parameter_option</span></tt> - option that takes one argument, for example
+<li><tt class="docutils literal">switch_option</tt> - a simple boolean switch without arguments, for example
+<tt class="docutils literal"><span class="pre">-O2</span></tt> or <tt class="docutils literal"><span class="pre">-time</span></tt>. At most one occurrence is allowed by default.</li>
+<li><tt class="docutils literal">parameter_option</tt> - option that takes one argument, for example
 <tt class="docutils literal"><span class="pre">-std=c99</span></tt>. It is also allowed to use spaces instead of the equality
-sign: <tt class="docutils literal"><span class="pre">-std</span> <span class="pre">c99</span></tt>. At most one occurrence is allowed.</li>
-<li><tt class="docutils literal"><span class="pre">parameter_list_option</span></tt> - same as the above, but more than one option
-occurence is allowed.</li>
-<li><tt class="docutils literal"><span class="pre">prefix_option</span></tt> - same as the parameter_option, but the option name and
+sign: <tt class="docutils literal"><span class="pre">-std</span> c99</tt>. At most one occurrence is allowed.</li>
+<li><tt class="docutils literal">parameter_list_option</tt> - same as the above, but more than one option
+occurrence is allowed.</li>
+<li><tt class="docutils literal">prefix_option</tt> - same as the parameter_option, but the option name and
 argument do not have to be separated. Example: <tt class="docutils literal"><span class="pre">-ofile</span></tt>. This can be also
-specified as <tt class="docutils literal"><span class="pre">-o</span> <span class="pre">file</span></tt>; however, <tt class="docutils literal"><span class="pre">-o=file</span></tt> will be parsed incorrectly
-(<tt class="docutils literal"><span class="pre">=file</span></tt> will be interpreted as option value). At most one occurrence is
+specified as <tt class="docutils literal"><span class="pre">-o</span> file</tt>; however, <tt class="docutils literal"><span class="pre">-o=file</span></tt> will be parsed incorrectly
+(<tt class="docutils literal">=file</tt> will be interpreted as option value). At most one occurrence is
 allowed.</li>
-<li><tt class="docutils literal"><span class="pre">prefix_list_option</span></tt> - same as the above, but more than one occurence of
+<li><tt class="docutils literal">prefix_list_option</tt> - same as the above, but more than one occurrence of
 the option is allowed; example: <tt class="docutils literal"><span class="pre">-lm</span> <span class="pre">-lpthread</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">alias_option</span></tt> - a special option type for creating aliases. Unlike other
+<li><tt class="docutils literal">alias_option</tt> - a special option type for creating aliases. Unlike other
 option types, aliases are not allowed to have any properties besides the
-aliased option name. Usage example: <tt class="docutils literal"><span class="pre">(alias_option</span> <span class="pre">&quot;preprocess&quot;,</span> <span class="pre">&quot;E&quot;)</span></tt></li>
+aliased option name.
+Usage example: <tt class="docutils literal">(alias_option &quot;preprocess&quot;, &quot;E&quot;)</tt></li>
+<li><tt class="docutils literal">switch_list_option</tt> - like <tt class="docutils literal">switch_option</tt> with the <tt class="docutils literal">zero_or_more</tt>
+property, but remembers how many times the switch was turned on. Useful
+mostly for forwarding. Example: when <tt class="docutils literal"><span class="pre">-foo</span></tt> is a switch option (with the
+<tt class="docutils literal">zero_or_more</tt> property), the command <tt class="docutils literal">driver <span class="pre">-foo</span> <span class="pre">-foo</span></tt> is forwarded
+as <tt class="docutils literal"><span class="pre">some-tool</span> <span class="pre">-foo</span></tt>, but when <tt class="docutils literal"><span class="pre">-foo</span></tt> is a switch list, the same command
+is forwarded as <tt class="docutils literal"><span class="pre">some-tool</span> <span class="pre">-foo</span> <span class="pre">-foo</span></tt>.</li>
 </ul>
 </blockquote>
 </li>
 <li><p class="first">Possible option properties:</p>
 <blockquote>
 <ul class="simple">
-<li><tt class="docutils literal"><span class="pre">help</span></tt> - help string associated with this option. Used for <tt class="docutils literal"><span class="pre">-help</span></tt>
+<li><tt class="docutils literal">help</tt> - help string associated with this option. Used for <tt class="docutils literal"><span class="pre">--help</span></tt>
 output.</li>
-<li><tt class="docutils literal"><span class="pre">required</span></tt> - this option must be specified exactly once (or, in case of
-the list options without the <tt class="docutils literal"><span class="pre">multi_val</span></tt> property, at least
-once). Incompatible with <tt class="docutils literal"><span class="pre">zero_or_one</span></tt> and <tt class="docutils literal"><span class="pre">one_or_more</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">one_or_more</span></tt> - the option must be specified at least one time. Useful
-only for list options in conjunction with <tt class="docutils literal"><span class="pre">multi_val</span></tt>; for ordinary lists
-it is synonymous with <tt class="docutils literal"><span class="pre">required</span></tt>. Incompatible with <tt class="docutils literal"><span class="pre">required</span></tt> and
-<tt class="docutils literal"><span class="pre">zero_or_one</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">optional</span></tt> - the option can be specified zero or one times. Useful only
-for list options in conjunction with <tt class="docutils literal"><span class="pre">multi_val</span></tt>. Incompatible with
-<tt class="docutils literal"><span class="pre">required</span></tt> and <tt class="docutils literal"><span class="pre">one_or_more</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">hidden</span></tt> - the description of this option will not appear in
-the <tt class="docutils literal"><span class="pre">-help</span></tt> output (but will appear in the <tt class="docutils literal"><span class="pre">-help-hidden</span></tt>
+<li><tt class="docutils literal">required</tt> - this option must be specified exactly once (or, in case of
+the list options without the <tt class="docutils literal">multi_val</tt> property, at least
+once). Incompatible with <tt class="docutils literal">optional</tt> and <tt class="docutils literal">one_or_more</tt>.</li>
+<li><tt class="docutils literal">optional</tt> - the option can be specified either zero times or exactly
+once. The default for switch options. Useful only for list options in
+conjunction with <tt class="docutils literal">multi_val</tt>. Incompatible with <tt class="docutils literal">required</tt>,
+<tt class="docutils literal">zero_or_more</tt> and <tt class="docutils literal">one_or_more</tt>.</li>
+<li><tt class="docutils literal">one_or_more</tt> - the option must be specified at least once. Can be useful
+to allow switch options be both obligatory and be specified multiple
+times. For list options is useful only in conjunction with <tt class="docutils literal">multi_val</tt>;
+for ordinary it is synonymous with <tt class="docutils literal">required</tt>. Incompatible with
+<tt class="docutils literal">required</tt>, <tt class="docutils literal">optional</tt> and <tt class="docutils literal">zero_or_more</tt>.</li>
+<li><tt class="docutils literal">zero_or_more</tt> - the option can be specified zero or more times. Useful
+to allow a single switch option to be specified more than
+once. Incompatible with <tt class="docutils literal">required</tt>, <tt class="docutils literal">optional</tt> and <tt class="docutils literal">one_or_more</tt>.</li>
+<li><tt class="docutils literal">hidden</tt> - the description of this option will not appear in
+the <tt class="docutils literal"><span class="pre">--help</span></tt> output (but will appear in the <tt class="docutils literal"><span class="pre">--help-hidden</span></tt>
 output).</li>
-<li><tt class="docutils literal"><span class="pre">really_hidden</span></tt> - the option will not be mentioned in any help
+<li><tt class="docutils literal">really_hidden</tt> - the option will not be mentioned in any help
 output.</li>
-<li><tt class="docutils literal"><span class="pre">comma_separated</span></tt> - Indicates that any commas specified for an option's
+<li><tt class="docutils literal">comma_separated</tt> - Indicates that any commas specified for an option's
 value should be used to split the value up into multiple values for the
 option. This property is valid only for list options. In conjunction with
-<tt class="docutils literal"><span class="pre">forward_value</span></tt> can be used to implement option forwarding in style of
+<tt class="docutils literal">forward_value</tt> can be used to implement option forwarding in style of
 gcc's <tt class="docutils literal"><span class="pre">-Wa,</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">multi_val</span> <span class="pre">n</span></tt> - this option takes <em>n</em> arguments (can be useful in some
-special cases). Usage example: <tt class="docutils literal"><span class="pre">(parameter_list_option</span> <span class="pre">&quot;foo&quot;,</span> <span class="pre">(multi_val</span>
-<span class="pre">3))</span></tt>; the command-line syntax is '-foo a b c'. Only list options can have
-this attribute; you can, however, use the <tt class="docutils literal"><span class="pre">one_or_more</span></tt>, <tt class="docutils literal"><span class="pre">optional</span></tt>
-and <tt class="docutils literal"><span class="pre">required</span></tt> properties.</li>
-<li><tt class="docutils literal"><span class="pre">init</span></tt> - this option has a default value, either a string (if it is a
+<li><tt class="docutils literal">multi_val n</tt> - this option takes <em>n</em> arguments (can be useful in some
+special cases). Usage example: <tt class="docutils literal">(parameter_list_option &quot;foo&quot;, (multi_val
+3))</tt>; the command-line syntax is '-foo a b c'. Only list options can have
+this attribute; you can, however, use the <tt class="docutils literal">one_or_more</tt>, <tt class="docutils literal">optional</tt>
+and <tt class="docutils literal">required</tt> properties.</li>
+<li><tt class="docutils literal">init</tt> - this option has a default value, either a string (if it is a
 parameter), or a boolean (if it is a switch; as in C++, boolean constants
-are called <tt class="docutils literal"><span class="pre">true</span></tt> and <tt class="docutils literal"><span class="pre">false</span></tt>). List options can't have <tt class="docutils literal"><span class="pre">init</span></tt>
+are called <tt class="docutils literal">true</tt> and <tt class="docutils literal">false</tt>). List options can't have <tt class="docutils literal">init</tt>
 attribute.
-Usage examples: <tt class="docutils literal"><span class="pre">(switch_option</span> <span class="pre">&quot;foo&quot;,</span> <span class="pre">(init</span> <span class="pre">true))</span></tt>; <tt class="docutils literal"><span class="pre">(prefix_option</span>
-<span class="pre">&quot;bar&quot;,</span> <span class="pre">(init</span> <span class="pre">&quot;baz&quot;))</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">extern</span></tt> - this option is defined in some other plugin, see <a class="reference internal" href="#extern">below</a>.</li>
+Usage examples: <tt class="docutils literal">(switch_option &quot;foo&quot;, (init true))</tt>; <tt class="docutils literal">(prefix_option
+&quot;bar&quot;, (init <span class="pre">&quot;baz&quot;))</span></tt>.</li>
 </ul>
 </blockquote>
 </li>
 </ul>
-<div class="section" id="external-options">
-<span id="extern"></span><h2><a class="toc-backref" href="#id15">External options</a></h2>
-<p>Sometimes, when linking several plugins together, one plugin needs to
-access options defined in some other plugin. Because of the way
-options are implemented, such options must be marked as
-<tt class="docutils literal"><span class="pre">extern</span></tt>. This is what the <tt class="docutils literal"><span class="pre">extern</span></tt> option property is
-for. Example:</p>
-<pre class="literal-block">
-...
-(switch_option &quot;E&quot;, (extern))
-...
-</pre>
-<p>If an external option has additional attributes besides 'extern', they are
-ignored. See also the section on plugin <a class="reference internal" href="#priorities">priorities</a>.</p>
-</div>
 </div>
 <div class="section" id="conditional-evaluation">
-<span id="case"></span><h1><a class="toc-backref" href="#id16">Conditional evaluation</a></h1>
-<p>The 'case' construct is the main means by which programmability is
-achieved in LLVMC. It can be used to calculate edge weights, program
-actions and modify the shell commands to be executed. The 'case'
-expression is designed after the similarly-named construct in
-functional languages and takes the form <tt class="docutils literal"><span class="pre">(case</span> <span class="pre">(test_1),</span> <span class="pre">statement_1,</span>
-<span class="pre">(test_2),</span> <span class="pre">statement_2,</span> <span class="pre">...</span> <span class="pre">(test_N),</span> <span class="pre">statement_N)</span></tt>. The statements
-are evaluated only if the corresponding tests evaluate to true.</p>
+<span id="case"></span><h1><a class="toc-backref" href="#id13">Conditional evaluation</a></h1>
+<p>The 'case' construct is the main means by which programmability is achieved in
+LLVMC. It can be used to calculate edge weights, program actions and modify the
+shell commands to be executed. The 'case' expression is designed after the
+similarly-named construct in functional languages and takes the form <tt class="docutils literal">(case
+(test_1), statement_1, (test_2), statement_2, ... (test_N), statement_N)</tt>. The
+statements are evaluated only if the corresponding tests evaluate to true.</p>
 <p>Examples:</p>
 <pre class="literal-block">
 // Edge weight calculation
@@ -410,129 +354,139 @@ are evaluated only if the corresponding tests evaluate to true.</p>
     (switch_on &quot;B&quot;), &quot;cmdline2&quot;,
     (default), &quot;cmdline3&quot;)
 </pre>
-<p>Note the slight difference in 'case' expression handling in contexts
-of edge weights and command line specification - in the second example
-the value of the <tt class="docutils literal"><span class="pre">&quot;B&quot;</span></tt> switch is never checked when switch <tt class="docutils literal"><span class="pre">&quot;A&quot;</span></tt> is
-enabled, and the whole expression always evaluates to <tt class="docutils literal"><span class="pre">&quot;cmdline1&quot;</span></tt> in
-that case.</p>
+<p>Note the slight difference in 'case' expression handling in contexts of edge
+weights and command line specification - in the second example the value of the
+<tt class="docutils literal">&quot;B&quot;</tt> switch is never checked when switch <tt class="docutils literal">&quot;A&quot;</tt> is enabled, and the whole
+expression always evaluates to <tt class="docutils literal">&quot;cmdline1&quot;</tt> in that case.</p>
 <p>Case expressions can also be nested, i.e. the following is legal:</p>
 <pre class="literal-block">
 (case (switch_on &quot;E&quot;), (case (switch_on &quot;o&quot;), ..., (default), ...)
       (default), ...)
 </pre>
-<p>You should, however, try to avoid doing that because it hurts
-readability. It is usually better to split tool descriptions and/or
-use TableGen inheritance instead.</p>
+<p>You should, however, try to avoid doing that because it hurts readability. It is
+usually better to split tool descriptions and/or use TableGen inheritance
+instead.</p>
 <ul class="simple">
 <li>Possible tests are:<ul>
-<li><tt class="docutils literal"><span class="pre">switch_on</span></tt> - Returns true if a given command-line switch is provided by
-the user. Can be given a list as argument, in that case <tt class="docutils literal"><span class="pre">(switch_on</span> <span class="pre">[&quot;foo&quot;,</span>
-<span class="pre">&quot;bar&quot;,</span> <span class="pre">&quot;baz&quot;])</span></tt> is equivalent to <tt class="docutils literal"><span class="pre">(and</span> <span class="pre">(switch_on</span> <span class="pre">&quot;foo&quot;),</span> <span class="pre">(switch_on</span>
-<span class="pre">&quot;bar&quot;),</span> <span class="pre">(switch_on</span> <span class="pre">&quot;baz&quot;))</span></tt>.
-Example: <tt class="docutils literal"><span class="pre">(switch_on</span> <span class="pre">&quot;opt&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">any_switch_on</span></tt> - Given a list of switch options, returns true if any of
+<li><tt class="docutils literal">switch_on</tt> - Returns true if a given command-line switch is provided by
+the user. Can be given multiple arguments, in that case <tt class="docutils literal">(switch_on &quot;foo&quot;,
+&quot;bar&quot;, &quot;baz&quot;)</tt> is equivalent to <tt class="docutils literal">(and (switch_on <span class="pre">&quot;foo&quot;),</span> (switch_on
+<span class="pre">&quot;bar&quot;),</span> (switch_on <span class="pre">&quot;baz&quot;))</span></tt>.
+Example: <tt class="docutils literal">(switch_on &quot;opt&quot;)</tt>.</li>
+<li><tt class="docutils literal">any_switch_on</tt> - Given a number of switch options, returns true if any of
 the switches is turned on.
-Example: <tt class="docutils literal"><span class="pre">(any_switch_on</span> <span class="pre">[&quot;foo&quot;,</span> <span class="pre">&quot;bar&quot;,</span> <span class="pre">&quot;baz&quot;])</span></tt> is equivalent to <tt class="docutils literal"><span class="pre">(or</span>
-<span class="pre">(switch_on</span> <span class="pre">&quot;foo&quot;),</span> <span class="pre">(switch_on</span> <span class="pre">&quot;bar&quot;),</span> <span class="pre">(switch_on</span> <span class="pre">&quot;baz&quot;))</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">parameter_equals</span></tt> - Returns true if a command-line parameter equals
-a given value.
-Example: <tt class="docutils literal"><span class="pre">(parameter_equals</span> <span class="pre">&quot;W&quot;,</span> <span class="pre">&quot;all&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">element_in_list</span></tt> - Returns true if a command-line parameter
-list contains a given value.
-Example: <tt class="docutils literal"><span class="pre">(element_in_list</span> <span class="pre">&quot;l&quot;,</span> <span class="pre">&quot;pthread&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">input_languages_contain</span></tt> - Returns true if a given language
+Example: <tt class="docutils literal">(any_switch_on &quot;foo&quot;, &quot;bar&quot;, &quot;baz&quot;)</tt> is equivalent to <tt class="docutils literal">(or
+(switch_on <span class="pre">&quot;foo&quot;),</span> (switch_on <span class="pre">&quot;bar&quot;),</span> (switch_on <span class="pre">&quot;baz&quot;))</span></tt>.</li>
+<li><tt class="docutils literal">parameter_equals</tt> - Returns true if a command-line parameter (first
+argument) equals a given value (second argument).
+Example: <tt class="docutils literal">(parameter_equals &quot;W&quot;, &quot;all&quot;)</tt>.</li>
+<li><tt class="docutils literal">element_in_list</tt> - Returns true if a command-line parameter list (first
+argument) contains a given value (second argument).
+Example: <tt class="docutils literal">(element_in_list &quot;l&quot;, &quot;pthread&quot;)</tt>.</li>
+<li><tt class="docutils literal">input_languages_contain</tt> - Returns true if a given language
 belongs to the current input language set.
-Example: <tt class="docutils literal"><span class="pre">(input_languages_contain</span> <span class="pre">&quot;c++&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">in_language</span></tt> - Evaluates to true if the input file language is equal to
-the argument. At the moment works only with <tt class="docutils literal"><span class="pre">cmd_line</span></tt> and <tt class="docutils literal"><span class="pre">actions</span></tt> (on
+Example: <tt class="docutils literal">(input_languages_contain <span class="pre">&quot;c++&quot;)</span></tt>.</li>
+<li><tt class="docutils literal">in_language</tt> - Evaluates to true if the input file language is equal to
+the argument. At the moment works only with <tt class="docutils literal">command</tt> and <tt class="docutils literal">actions</tt> (on
 non-join nodes).
-Example: <tt class="docutils literal"><span class="pre">(in_language</span> <span class="pre">&quot;c++&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">not_empty</span></tt> - Returns true if a given option (which should be either a
-parameter or a parameter list) is set by the user. Like <tt class="docutils literal"><span class="pre">switch_on</span></tt>, can
-be also given a list as argument.
-Example: <tt class="docutils literal"><span class="pre">(not_empty</span> <span class="pre">&quot;o&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">any_not_empty</span></tt> - Returns true if <tt class="docutils literal"><span class="pre">not_empty</span></tt> returns true for any of
-the options in the list.
-Example: <tt class="docutils literal"><span class="pre">(any_not_empty</span> <span class="pre">[&quot;foo&quot;,</span> <span class="pre">&quot;bar&quot;,</span> <span class="pre">&quot;baz&quot;])</span></tt> is equivalent to <tt class="docutils literal"><span class="pre">(or</span>
-<span class="pre">(not_empty</span> <span class="pre">&quot;foo&quot;),</span> <span class="pre">(not_empty</span> <span class="pre">&quot;bar&quot;),</span> <span class="pre">(not_empty</span> <span class="pre">&quot;baz&quot;))</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">empty</span></tt> - The opposite of <tt class="docutils literal"><span class="pre">not_empty</span></tt>. Equivalent to <tt class="docutils literal"><span class="pre">(not</span> <span class="pre">(not_empty</span>
-<span class="pre">X))</span></tt>. Provided for convenience. Can be given a list as argument.</li>
-<li><tt class="docutils literal"><span class="pre">any_not_empty</span></tt> - Returns true if <tt class="docutils literal"><span class="pre">not_empty</span></tt> returns true for any of
-the options in the list.
-Example: <tt class="docutils literal"><span class="pre">(any_empty</span> <span class="pre">[&quot;foo&quot;,</span> <span class="pre">&quot;bar&quot;,</span> <span class="pre">&quot;baz&quot;])</span></tt> is equivalent to <tt class="docutils literal"><span class="pre">(not</span> <span class="pre">(and</span>
-<span class="pre">(not_empty</span> <span class="pre">&quot;foo&quot;),</span> <span class="pre">(not_empty</span> <span class="pre">&quot;bar&quot;),</span> <span class="pre">(not_empty</span> <span class="pre">&quot;baz&quot;)))</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">single_input_file</span></tt> - Returns true if there was only one input file
+Example: <tt class="docutils literal">(in_language <span class="pre">&quot;c++&quot;)</span></tt>.</li>
+<li><tt class="docutils literal">not_empty</tt> - Returns true if a given option (which should be either a
+parameter or a parameter list) is set by the user. Like <tt class="docutils literal">switch_on</tt>, can
+be also given multiple arguments.
+Examples: <tt class="docutils literal">(not_empty &quot;o&quot;)</tt>, <tt class="docutils literal">(not_empty &quot;o&quot;, &quot;l&quot;)</tt>.</li>
+<li><tt class="docutils literal">any_not_empty</tt> - Returns true if <tt class="docutils literal">not_empty</tt> returns true for any of
+the provided options.
+Example: <tt class="docutils literal">(any_not_empty &quot;foo&quot;, &quot;bar&quot;, &quot;baz&quot;)</tt> is equivalent to <tt class="docutils literal">(or
+(not_empty <span class="pre">&quot;foo&quot;),</span> (not_empty <span class="pre">&quot;bar&quot;),</span> (not_empty <span class="pre">&quot;baz&quot;))</span></tt>.</li>
+<li><tt class="docutils literal">empty</tt> - The opposite of <tt class="docutils literal">not_empty</tt>. Equivalent to <tt class="docutils literal">(not (not_empty
+X))</tt>. Can be given multiple arguments.</li>
+<li><tt class="docutils literal">any_not_empty</tt> - Returns true if <tt class="docutils literal">not_empty</tt> returns true for any of
+the provided options.
+Example: <tt class="docutils literal">(any_empty &quot;foo&quot;, &quot;bar&quot;, &quot;baz&quot;)</tt> is equivalent to <tt class="docutils literal">(or
+(not_empty <span class="pre">&quot;foo&quot;),</span> (not_empty <span class="pre">&quot;bar&quot;),</span> (not_empty <span class="pre">&quot;baz&quot;))</span></tt>.</li>
+<li><tt class="docutils literal">single_input_file</tt> - Returns true if there was only one input file
 provided on the command-line. Used without arguments:
-<tt class="docutils literal"><span class="pre">(single_input_file)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">multiple_input_files</span></tt> - Equivalent to <tt class="docutils literal"><span class="pre">(not</span> <span class="pre">(single_input_file))</span></tt> (the
+<tt class="docutils literal">(single_input_file)</tt>.</li>
+<li><tt class="docutils literal">multiple_input_files</tt> - Equivalent to <tt class="docutils literal">(not (single_input_file))</tt> (the
 case of zero input files is considered an error).</li>
-<li><tt class="docutils literal"><span class="pre">default</span></tt> - Always evaluates to true. Should always be the last
-test in the <tt class="docutils literal"><span class="pre">case</span></tt> expression.</li>
-<li><tt class="docutils literal"><span class="pre">and</span></tt> - A standard binary logical combinator that returns true iff all of
-its arguments return true. Used like this: <tt class="docutils literal"><span class="pre">(and</span> <span class="pre">(test1),</span> <span class="pre">(test2),</span>
-<span class="pre">...</span> <span class="pre">(testN))</span></tt>. Nesting of <tt class="docutils literal"><span class="pre">and</span></tt> and <tt class="docutils literal"><span class="pre">or</span></tt> is allowed, but not
+<li><tt class="docutils literal">default</tt> - Always evaluates to true. Should always be the last
+test in the <tt class="docutils literal">case</tt> expression.</li>
+<li><tt class="docutils literal">and</tt> - A standard logical combinator that returns true iff all of
+its arguments return true. Used like this: <tt class="docutils literal">(and (test1), (test2),
+... (testN))</tt>. Nesting of <tt class="docutils literal">and</tt> and <tt class="docutils literal">or</tt> is allowed, but not
 encouraged.</li>
-<li><tt class="docutils literal"><span class="pre">or</span></tt> - A binary logical combinator that returns true iff any of its
-arguments returns true. Example: <tt class="docutils literal"><span class="pre">(or</span> <span class="pre">(test1),</span> <span class="pre">(test2),</span> <span class="pre">...</span> <span class="pre">(testN))</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">not</span></tt> - Standard unary logical combinator that negates its
-argument. Example: <tt class="docutils literal"><span class="pre">(not</span> <span class="pre">(or</span> <span class="pre">(test1),</span> <span class="pre">(test2),</span> <span class="pre">...</span> <span class="pre">(testN)))</span></tt>.</li>
+<li><tt class="docutils literal">or</tt> - A logical combinator that returns true iff any of its arguments
+return true.
+Example: <tt class="docutils literal">(or (test1), (test2), ... (testN))</tt>.</li>
+<li><tt class="docutils literal">not</tt> - Standard unary logical combinator that negates its
+argument.
+Example: <tt class="docutils literal">(not (or (test1), (test2), ... <span class="pre">(testN)))</span></tt>.</li>
 </ul>
 </li>
 </ul>
 </div>
 <div class="section" id="writing-a-tool-description">
-<h1><a class="toc-backref" href="#id17">Writing a tool description</a></h1>
-<p>As was said earlier, nodes in the compilation graph represent tools,
-which are described separately. A tool definition looks like this
-(taken from the <tt class="docutils literal"><span class="pre">include/llvm/CompilerDriver/Tools.td</span></tt> file):</p>
+<h1><a class="toc-backref" href="#id14">Writing a tool description</a></h1>
+<p>As was said earlier, nodes in the compilation graph represent tools, which are
+described separately. A tool definition looks like this (taken from the
+<tt class="docutils literal">llvmc/src/Base.td</tt> file):</p>
 <pre class="literal-block">
 def llvm_gcc_cpp : Tool&lt;[
     (in_language &quot;c++&quot;),
     (out_language &quot;llvm-assembler&quot;),
     (output_suffix &quot;bc&quot;),
-    (cmd_line &quot;llvm-g++ -c $INFILE -o $OUTFILE -emit-llvm&quot;),
+    (command &quot;llvm-g++ -c -emit-llvm&quot;),
     (sink)
     ]&gt;;
 </pre>
-<p>This defines a new tool called <tt class="docutils literal"><span class="pre">llvm_gcc_cpp</span></tt>, which is an alias for
-<tt class="docutils literal"><span class="pre">llvm-g++</span></tt>. As you can see, a tool definition is just a list of
-properties; most of them should be self-explanatory. The <tt class="docutils literal"><span class="pre">sink</span></tt>
-property means that this tool should be passed all command-line
-options that aren't mentioned in the option list.</p>
+<p>This defines a new tool called <tt class="docutils literal">llvm_gcc_cpp</tt>, which is an alias for
+<tt class="docutils literal"><span class="pre">llvm-g++</span></tt>. As you can see, a tool definition is just a list of properties;
+most of them should be self-explanatory. The <tt class="docutils literal">sink</tt> property means that this
+tool should be passed all command-line options that aren't mentioned in the
+option list.</p>
 <p>The complete list of all currently implemented tool properties follows.</p>
 <ul class="simple">
 <li>Possible tool properties:<ul>
-<li><tt class="docutils literal"><span class="pre">in_language</span></tt> - input language name. Can be either a string or a
-list, in case the tool supports multiple input languages.</li>
-<li><tt class="docutils literal"><span class="pre">out_language</span></tt> - output language name. Multiple output languages are not
-allowed.</li>
-<li><tt class="docutils literal"><span class="pre">output_suffix</span></tt> - output file suffix. Can also be changed
-dynamically, see documentation on actions.</li>
-<li><tt class="docutils literal"><span class="pre">cmd_line</span></tt> - the actual command used to run the tool. You can
-use <tt class="docutils literal"><span class="pre">$INFILE</span></tt> and <tt class="docutils literal"><span class="pre">$OUTFILE</span></tt> variables, output redirection
-with <tt class="docutils literal"><span class="pre">&gt;</span></tt>, hook invocations (<tt class="docutils literal"><span class="pre">$CALL</span></tt>), environment variables
-(via <tt class="docutils literal"><span class="pre">$ENV</span></tt>) and the <tt class="docutils literal"><span class="pre">case</span></tt> construct.</li>
-<li><tt class="docutils literal"><span class="pre">join</span></tt> - this tool is a &quot;join node&quot; in the graph, i.e. it gets a
-list of input files and joins them together. Used for linkers.</li>
-<li><tt class="docutils literal"><span class="pre">sink</span></tt> - all command-line options that are not handled by other
-tools are passed to this tool.</li>
-<li><tt class="docutils literal"><span class="pre">actions</span></tt> - A single big <tt class="docutils literal"><span class="pre">case</span></tt> expression that specifies how
-this tool reacts on command-line options (described in more detail
-<a class="reference internal" href="#actions">below</a>).</li>
+<li><tt class="docutils literal">in_language</tt> - input language name. Can be given multiple arguments, in
+case the tool supports multiple input languages. Used for typechecking and
+mapping file extensions to tools.</li>
+<li><tt class="docutils literal">out_language</tt> - output language name. Multiple output languages are
+allowed. Used for typechecking the compilation graph.</li>
+<li><tt class="docutils literal">output_suffix</tt> - output file suffix. Can also be changed dynamically, see
+documentation on <a class="reference internal" href="#actions">actions</a>.</li>
 </ul>
 </li>
 </ul>
-<div class="section" id="id5">
-<span id="actions"></span><h2><a class="toc-backref" href="#id18">Actions</a></h2>
-<p>A tool often needs to react to command-line options, and this is
-precisely what the <tt class="docutils literal"><span class="pre">actions</span></tt> property is for. The next example
-illustrates this feature:</p>
+<blockquote>
+<ul class="simple">
+<li><tt class="docutils literal">command</tt> - the actual command used to run the tool. You can use output
+redirection with <tt class="docutils literal">&gt;</tt>, hook invocations (<tt class="docutils literal">$CALL</tt>), environment variables
+(via <tt class="docutils literal">$ENV</tt>) and the <tt class="docutils literal">case</tt> construct.</li>
+<li><tt class="docutils literal">join</tt> - this tool is a &quot;join node&quot; in the graph, i.e. it gets a list of
+input files and joins them together. Used for linkers.</li>
+<li><tt class="docutils literal">sink</tt> - all command-line options that are not handled by other tools are
+passed to this tool.</li>
+<li><tt class="docutils literal">actions</tt> - A single big <tt class="docutils literal">case</tt> expression that specifies how this tool
+reacts on command-line options (described in more detail <a class="reference internal" href="#actions">below</a>).</li>
+</ul>
+</blockquote>
+<blockquote>
+<ul class="simple">
+<li><tt class="docutils literal">out_file_option</tt>, <tt class="docutils literal">in_file_option</tt> - Options appended to the
+<tt class="docutils literal">command</tt> string to designate output and input files. Default values are
+<tt class="docutils literal"><span class="pre">&quot;-o&quot;</span></tt> and <tt class="docutils literal">&quot;&quot;</tt>, respectively.</li>
+</ul>
+</blockquote>
+<div class="section" id="id4">
+<span id="actions"></span><h2><a class="toc-backref" href="#id15">Actions</a></h2>
+<p>A tool often needs to react to command-line options, and this is precisely what
+the <tt class="docutils literal">actions</tt> property is for. The next example illustrates this feature:</p>
 <pre class="literal-block">
 def llvm_gcc_linker : Tool&lt;[
     (in_language &quot;object-code&quot;),
     (out_language &quot;executable&quot;),
     (output_suffix &quot;out&quot;),
-    (cmd_line &quot;llvm-gcc $INFILE -o $OUTFILE&quot;),
+    (command &quot;llvm-gcc&quot;),
     (join),
     (actions (case (not_empty &quot;L&quot;), (forward &quot;L&quot;),
                    (not_empty &quot;l&quot;), (forward &quot;l&quot;),
@@ -540,47 +494,46 @@ def llvm_gcc_linker : Tool&lt;[
                              [(append_cmd &quot;-dummy1&quot;), (append_cmd &quot;-dummy2&quot;)])
     ]&gt;;
 </pre>
-<p>The <tt class="docutils literal"><span class="pre">actions</span></tt> tool property is implemented on top of the omnipresent
-<tt class="docutils literal"><span class="pre">case</span></tt> expression. It associates one or more different <em>actions</em>
-with given conditions - in the example, the actions are <tt class="docutils literal"><span class="pre">forward</span></tt>,
-which forwards a given option unchanged, and <tt class="docutils literal"><span class="pre">append_cmd</span></tt>, which
-appends a given string to the tool execution command. Multiple actions
-can be associated with a single condition by using a list of actions
-(used in the example to append some dummy options). The same <tt class="docutils literal"><span class="pre">case</span></tt>
-construct can also be used in the <tt class="docutils literal"><span class="pre">cmd_line</span></tt> property to modify the
-tool command line.</p>
-<p>The &quot;join&quot; property used in the example means that this tool behaves
-like a linker.</p>
+<p>The <tt class="docutils literal">actions</tt> tool property is implemented on top of the omnipresent <tt class="docutils literal">case</tt>
+expression. It associates one or more different <em>actions</em> with given
+conditions - in the example, the actions are <tt class="docutils literal">forward</tt>, which forwards a given
+option unchanged, and <tt class="docutils literal">append_cmd</tt>, which appends a given string to the tool
+execution command. Multiple actions can be associated with a single condition by
+using a list of actions (used in the example to append some dummy options). The
+same <tt class="docutils literal">case</tt> construct can also be used in the <tt class="docutils literal">cmd_line</tt> property to modify
+the tool command line.</p>
+<p>The &quot;join&quot; property used in the example means that this tool behaves like a
+linker.</p>
 <p>The list of all possible actions follows.</p>
 <ul>
 <li><p class="first">Possible actions:</p>
 <blockquote>
 <ul class="simple">
-<li><tt class="docutils literal"><span class="pre">append_cmd</span></tt> - Append a string to the tool invocation command.
-Example: <tt class="docutils literal"><span class="pre">(case</span> <span class="pre">(switch_on</span> <span class="pre">&quot;pthread&quot;),</span> <span class="pre">(append_cmd</span> <span class="pre">&quot;-lpthread&quot;))</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">error</span></tt> - Exit with error.
-Example: <tt class="docutils literal"><span class="pre">(error</span> <span class="pre">&quot;Mixing</span> <span class="pre">-c</span> <span class="pre">and</span> <span class="pre">-S</span> <span class="pre">is</span> <span class="pre">not</span> <span class="pre">allowed!&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">warning</span></tt> - Print a warning.
-Example: <tt class="docutils literal"><span class="pre">(warning</span> <span class="pre">&quot;Specifying</span> <span class="pre">both</span> <span class="pre">-O1</span> <span class="pre">and</span> <span class="pre">-O2</span> <span class="pre">is</span> <span class="pre">meaningless!&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">forward</span></tt> - Forward the option unchanged.
-Example: <tt class="docutils literal"><span class="pre">(forward</span> <span class="pre">&quot;Wall&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">forward_as</span></tt> - Change the option's name, but forward the argument
+<li><tt class="docutils literal">append_cmd</tt> - Append a string to the tool invocation command.
+Example: <tt class="docutils literal">(case (switch_on <span class="pre">&quot;pthread&quot;),</span> (append_cmd <span class="pre">&quot;-lpthread&quot;))</span></tt>.</li>
+<li><tt class="docutils literal">error</tt> - Exit with error.
+Example: <tt class="docutils literal">(error &quot;Mixing <span class="pre">-c</span> and <span class="pre">-S</span> is not <span class="pre">allowed!&quot;)</span></tt>.</li>
+<li><tt class="docutils literal">warning</tt> - Print a warning.
+Example: <tt class="docutils literal">(warning &quot;Specifying both <span class="pre">-O1</span> and <span class="pre">-O2</span> is <span class="pre">meaningless!&quot;)</span></tt>.</li>
+<li><tt class="docutils literal">forward</tt> - Forward the option unchanged.
+Example: <tt class="docutils literal">(forward &quot;Wall&quot;)</tt>.</li>
+<li><tt class="docutils literal">forward_as</tt> - Change the option's name, but forward the argument
 unchanged.
-Example: <tt class="docutils literal"><span class="pre">(forward_as</span> <span class="pre">&quot;O0&quot;,</span> <span class="pre">&quot;--disable-optimization&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">forward_value</span></tt> - Forward only option's value. Cannot be used with switch
+Example: <tt class="docutils literal">(forward_as &quot;O0&quot;, <span class="pre">&quot;--disable-optimization&quot;)</span></tt>.</li>
+<li><tt class="docutils literal">forward_value</tt> - Forward only option's value. Cannot be used with switch
 options (since they don't have values), but works fine with lists.
-Example: <tt class="docutils literal"><span class="pre">(forward_value</span> <span class="pre">&quot;Wa,&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">forward_transformed_value</span></tt> - As above, but applies a hook to the
+Example: <tt class="docutils literal">(forward_value <span class="pre">&quot;Wa,&quot;)</span></tt>.</li>
+<li><tt class="docutils literal">forward_transformed_value</tt> - As above, but applies a hook to the
 option's value before forwarding (see <a class="reference internal" href="#hooks">below</a>). When
-<tt class="docutils literal"><span class="pre">forward_transformed_value</span></tt> is applied to a list
+<tt class="docutils literal">forward_transformed_value</tt> is applied to a list
 option, the hook must have signature
-<tt class="docutils literal"><span class="pre">std::string</span> <span class="pre">hooks::HookName</span> <span class="pre">(const</span> <span class="pre">std::vector&lt;std::string&gt;&amp;)</span></tt>.
-Example: <tt class="docutils literal"><span class="pre">(forward_transformed_value</span> <span class="pre">&quot;m&quot;,</span> <span class="pre">&quot;ConvertToMAttr&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">output_suffix</span></tt> - Modify the output suffix of this tool.
-Example: <tt class="docutils literal"><span class="pre">(output_suffix</span> <span class="pre">&quot;i&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">stop_compilation</span></tt> - Stop compilation after this tool processes its
+<tt class="docutils literal"><span class="pre">std::string</span> <span class="pre">hooks::HookName</span> (const <span class="pre">std::vector&lt;std::string&gt;&amp;)</span></tt>.
+Example: <tt class="docutils literal">(forward_transformed_value &quot;m&quot;, &quot;ConvertToMAttr&quot;)</tt>.</li>
+<li><tt class="docutils literal">output_suffix</tt> - Modify the output suffix of this tool.
+Example: <tt class="docutils literal">(output_suffix &quot;i&quot;)</tt>.</li>
+<li><tt class="docutils literal">stop_compilation</tt> - Stop compilation after this tool processes its
 input. Used without arguments.
-Example: <tt class="docutils literal"><span class="pre">(stop_compilation)</span></tt>.</li>
+Example: <tt class="docutils literal">(stop_compilation)</tt>.</li>
 </ul>
 </blockquote>
 </li>
@@ -588,11 +541,11 @@ Example: <tt class="docutils literal"><span class="pre">(stop_compilation)</span
 </div>
 </div>
 <div class="section" id="language-map">
-<h1><a class="toc-backref" href="#id19">Language map</a></h1>
-<p>If you are adding support for a new language to LLVMC, you'll need to
-modify the language map, which defines mappings from file extensions
-to language names. It is used to choose the proper toolchain(s) for a
-given input file set. Language map definition looks like this:</p>
+<h1><a class="toc-backref" href="#id16">Language map</a></h1>
+<p>If you are adding support for a new language to LLVMC, you'll need to modify the
+language map, which defines mappings from file extensions to language names. It
+is used to choose the proper toolchain(s) for a given input file set. Language
+map definition looks like this:</p>
 <pre class="literal-block">
 def LanguageMap : LanguageMap&lt;
     [LangToSuffixes&lt;&quot;c++&quot;, [&quot;cc&quot;, &quot;cp&quot;, &quot;cxx&quot;, &quot;cpp&quot;, &quot;CPP&quot;, &quot;c++&quot;, &quot;C&quot;]&gt;,
@@ -606,73 +559,69 @@ $ llvmc hello.cpp
 llvmc: Unknown suffix: cpp
 </pre>
 <p>The language map entries are needed only for the tools that are linked from the
-root node. Since a tool can't have multiple output languages, for inner nodes of
-the graph the input and output languages should match. This is enforced at
-compile-time.</p>
+root node. A tool can have multiple output languages.</p>
 </div>
 <div class="section" id="option-preprocessor">
-<h1><a class="toc-backref" href="#id20">Option preprocessor</a></h1>
+<h1><a class="toc-backref" href="#id17">Option preprocessor</a></h1>
 <p>It is sometimes useful to run error-checking code before processing the
 compilation graph. For example, if optimization options &quot;-O1&quot; and &quot;-O2&quot; are
 implemented as switches, we might want to output a warning if the user invokes
 the driver with both of these options enabled.</p>
-<p>The <tt class="docutils literal"><span class="pre">OptionPreprocessor</span></tt> feature is reserved specially for these
-occasions. Example (adapted from the built-in Base plugin):</p>
+<p>The <tt class="docutils literal">OptionPreprocessor</tt> feature is reserved specially for these
+occasions. Example (adapted from <tt class="docutils literal">llvm/src/Base.td.in</tt>):</p>
 <pre class="literal-block">
 def Preprocess : OptionPreprocessor&lt;
-(case (not (any_switch_on [&quot;O0&quot;, &quot;O1&quot;, &quot;O2&quot;, &quot;O3&quot;])),
+(case (not (any_switch_on &quot;O0&quot;, &quot;O1&quot;, &quot;O2&quot;, &quot;O3&quot;)),
            (set_option &quot;O2&quot;),
-      (and (switch_on &quot;O3&quot;), (any_switch_on [&quot;O0&quot;, &quot;O1&quot;, &quot;O2&quot;])),
-           (unset_option [&quot;O0&quot;, &quot;O1&quot;, &quot;O2&quot;]),
-      (and (switch_on &quot;O2&quot;), (any_switch_on [&quot;O0&quot;, &quot;O1&quot;])),
-           (unset_option [&quot;O0&quot;, &quot;O1&quot;]),
+      (and (switch_on &quot;O3&quot;), (any_switch_on &quot;O0&quot;, &quot;O1&quot;, &quot;O2&quot;)),
+           (unset_option &quot;O0&quot;, &quot;O1&quot;, &quot;O2&quot;),
+      (and (switch_on &quot;O2&quot;), (any_switch_on &quot;O0&quot;, &quot;O1&quot;)),
+           (unset_option &quot;O0&quot;, &quot;O1&quot;),
       (and (switch_on &quot;O1&quot;), (switch_on &quot;O0&quot;)),
            (unset_option &quot;O0&quot;))
 &gt;;
 </pre>
-<p>Here, <tt class="docutils literal"><span class="pre">OptionPreprocessor</span></tt> is used to unset all spurious <tt class="docutils literal"><span class="pre">-O</span></tt> options so
+<p>Here, <tt class="docutils literal">OptionPreprocessor</tt> is used to unset all spurious <tt class="docutils literal"><span class="pre">-O</span></tt> options so
 that they are not forwarded to the compiler. If no optimization options are
 specified, <tt class="docutils literal"><span class="pre">-O2</span></tt> is enabled.</p>
-<p><tt class="docutils literal"><span class="pre">OptionPreprocessor</span></tt> is basically a single big <tt class="docutils literal"><span class="pre">case</span></tt> expression, which is
-evaluated only once right after the plugin is loaded. The only allowed actions
-in <tt class="docutils literal"><span class="pre">OptionPreprocessor</span></tt> are <tt class="docutils literal"><span class="pre">error</span></tt>, <tt class="docutils literal"><span class="pre">warning</span></tt>, and two special actions:
-<tt class="docutils literal"><span class="pre">unset_option</span></tt> and <tt class="docutils literal"><span class="pre">set_option</span></tt>. As their names suggest, they can be used to
-set or unset a given option. To set an option with <tt class="docutils literal"><span class="pre">set_option</span></tt>, use the
-two-argument form: <tt class="docutils literal"><span class="pre">(set_option</span> <span class="pre">&quot;parameter&quot;,</span> <span class="pre">VALUE)</span></tt>. Here, <tt class="docutils literal"><span class="pre">VALUE</span></tt> can be
+<p><tt class="docutils literal">OptionPreprocessor</tt> is basically a single big <tt class="docutils literal">case</tt> expression, which is
+evaluated only once right after the driver is started. The only allowed actions
+in <tt class="docutils literal">OptionPreprocessor</tt> are <tt class="docutils literal">error</tt>, <tt class="docutils literal">warning</tt>, and two special actions:
+<tt class="docutils literal">unset_option</tt> and <tt class="docutils literal">set_option</tt>. As their names suggest, they can be used to
+set or unset a given option. To set an option with <tt class="docutils literal">set_option</tt>, use the
+two-argument form: <tt class="docutils literal">(set_option &quot;parameter&quot;, VALUE)</tt>. Here, <tt class="docutils literal">VALUE</tt> can be
 either a string, a string list, or a boolean constant.</p>
-<p>For convenience, <tt class="docutils literal"><span class="pre">set_option</span></tt> and <tt class="docutils literal"><span class="pre">unset_option</span></tt> also work on lists. That
-is, instead of <tt class="docutils literal"><span class="pre">[(unset_option</span> <span class="pre">&quot;A&quot;),</span> <span class="pre">(unset_option</span> <span class="pre">&quot;B&quot;)]</span></tt> you can use
-<tt class="docutils literal"><span class="pre">(unset_option</span> <span class="pre">[&quot;A&quot;,</span> <span class="pre">&quot;B&quot;])</span></tt>. Obviously, <tt class="docutils literal"><span class="pre">(set_option</span> <span class="pre">[&quot;A&quot;,</span> <span class="pre">&quot;B&quot;])</span></tt> is valid
-only if both <tt class="docutils literal"><span class="pre">A</span></tt> and <tt class="docutils literal"><span class="pre">B</span></tt> are switches.</p>
+<p>For convenience, <tt class="docutils literal">set_option</tt> and <tt class="docutils literal">unset_option</tt> also work with multiple
+arguments. That is, instead of <tt class="docutils literal">[(unset_option <span class="pre">&quot;A&quot;),</span> (unset_option <span class="pre">&quot;B&quot;)]</span></tt> you
+can use <tt class="docutils literal">(unset_option &quot;A&quot;, &quot;B&quot;)</tt>. Obviously, <tt class="docutils literal">(set_option &quot;A&quot;, &quot;B&quot;)</tt> is
+only valid if both <tt class="docutils literal">A</tt> and <tt class="docutils literal">B</tt> are switches.</p>
 </div>
 <div class="section" id="more-advanced-topics">
-<h1><a class="toc-backref" href="#id21">More advanced topics</a></h1>
+<h1><a class="toc-backref" href="#id18">More advanced topics</a></h1>
 <div class="section" id="hooks-and-environment-variables">
-<span id="hooks"></span><h2><a class="toc-backref" href="#id22">Hooks and environment variables</a></h2>
-<p>Normally, LLVMC executes programs from the system <tt class="docutils literal"><span class="pre">PATH</span></tt>. Sometimes,
-this is not sufficient: for example, we may want to specify tool paths
-or names in the configuration file. This can be easily achieved via
-the hooks mechanism. To write your own hooks, just add their
-definitions to the <tt class="docutils literal"><span class="pre">PluginMain.cpp</span></tt> or drop a <tt class="docutils literal"><span class="pre">.cpp</span></tt> file into the
-your plugin directory. Hooks should live in the <tt class="docutils literal"><span class="pre">hooks</span></tt> namespace
-and have the signature <tt class="docutils literal"><span class="pre">std::string</span> <span class="pre">hooks::MyHookName</span> <span class="pre">([const</span> <span class="pre">char*</span>
-<span class="pre">Arg0</span> <span class="pre">[</span> <span class="pre">const</span> <span class="pre">char*</span> <span class="pre">Arg2</span> <span class="pre">[,</span> <span class="pre">...]]])</span></tt>. They can be used from the
-<tt class="docutils literal"><span class="pre">cmd_line</span></tt> tool property:</p>
+<span id="hooks"></span><h2><a class="toc-backref" href="#id19">Hooks and environment variables</a></h2>
+<p>Normally, LLVMC searches for programs in the system <tt class="docutils literal">PATH</tt>. Sometimes, this is
+not sufficient: for example, we may want to specify tool paths or names in the
+configuration file. This can be achieved via the hooks mechanism. To write your
+own hooks, add their definitions to the <tt class="docutils literal">Hooks.cpp</tt> or drop a <tt class="docutils literal">.cpp</tt> file
+into your driver directory. Hooks should live in the <tt class="docutils literal">hooks</tt> namespace and
+have the signature <tt class="docutils literal"><span class="pre">std::string</span> <span class="pre">hooks::MyHookName</span> ([const char* Arg0 [ const
+char* Arg2 [, <span class="pre">...]]])</span></tt>. They can be used from the <tt class="docutils literal">command</tt> tool property:</p>
 <pre class="literal-block">
-(cmd_line &quot;$CALL(MyHook)/path/to/file -o $CALL(AnotherHook)&quot;)
+(command &quot;$CALL(MyHook)/path/to/file -o $CALL(AnotherHook)&quot;)
 </pre>
 <p>To pass arguments to hooks, use the following syntax:</p>
 <pre class="literal-block">
-(cmd_line &quot;$CALL(MyHook, 'Arg1', 'Arg2', 'Arg # 3')/path/to/file -o1 -o2&quot;)
+(command &quot;$CALL(MyHook, 'Arg1', 'Arg2', 'Arg # 3')/path/to/file -o1 -o2&quot;)
 </pre>
 <p>It is also possible to use environment variables in the same manner:</p>
 <pre class="literal-block">
-(cmd_line &quot;$ENV(VAR1)/path/to/file -o $ENV(VAR2)&quot;)
+(command &quot;$ENV(VAR1)/path/to/file -o $ENV(VAR2)&quot;)
 </pre>
 <p>To change the command line string based on user-provided options use
-the <tt class="docutils literal"><span class="pre">case</span></tt> expression (documented <a class="reference internal" href="#case">above</a>):</p>
+the <tt class="docutils literal">case</tt> expression (documented <a class="reference internal" href="#case">above</a>):</p>
 <pre class="literal-block">
-(cmd_line
+(command
   (case
     (switch_on &quot;E&quot;),
        &quot;llvm-g++ -E -x c $INFILE -o $OUTFILE&quot;,
@@ -680,41 +629,23 @@ the <tt class="docutils literal"><span class="pre">case</span></tt> expression (
        &quot;llvm-g++ -c -x c $INFILE -o $OUTFILE -emit-llvm&quot;))
 </pre>
 </div>
-<div class="section" id="how-plugins-are-loaded">
-<span id="priorities"></span><h2><a class="toc-backref" href="#id23">How plugins are loaded</a></h2>
-<p>It is possible for LLVMC plugins to depend on each other. For example,
-one can create edges between nodes defined in some other plugin. To
-make this work, however, that plugin should be loaded first. To
-achieve this, the concept of plugin priority was introduced. By
-default, every plugin has priority zero; to specify the priority
-explicitly, put the following line in your plugin's TableGen file:</p>
-<pre class="literal-block">
-def Priority : PluginPriority&lt;$PRIORITY_VALUE&gt;;
-# Where PRIORITY_VALUE is some integer &gt; 0
-</pre>
-<p>Plugins are loaded in order of their (increasing) priority, starting
-with 0. Therefore, the plugin with the highest priority value will be
-loaded last.</p>
-</div>
 <div class="section" id="debugging">
-<h2><a class="toc-backref" href="#id24">Debugging</a></h2>
-<p>When writing LLVMC plugins, it can be useful to get a visual view of
-the resulting compilation graph. This can be achieved via the command
-line option <tt class="docutils literal"><span class="pre">--view-graph</span></tt>. This command assumes that <a class="reference external" href="http://www.graphviz.org/">Graphviz</a> and
-<a class="reference external" href="http://pages.cs.wisc.edu/~ghost/">Ghostview</a> are installed. There is also a <tt class="docutils literal"><span class="pre">--write-graph</span></tt> option that
-creates a Graphviz source file (<tt class="docutils literal"><span class="pre">compilation-graph.dot</span></tt>) in the
-current directory.</p>
-<p>Another useful <tt class="docutils literal"><span class="pre">llvmc</span></tt> option is <tt class="docutils literal"><span class="pre">--check-graph</span></tt>. It checks the
-compilation graph for common errors like mismatched output/input
-language names, multiple default edges and cycles. These checks can't
-be performed at compile-time because the plugins can load code
-dynamically. When invoked with <tt class="docutils literal"><span class="pre">--check-graph</span></tt>, <tt class="docutils literal"><span class="pre">llvmc</span></tt> doesn't
-perform any compilation tasks and returns the number of encountered
-errors as its status code.</p>
+<h2><a class="toc-backref" href="#id20">Debugging</a></h2>
+<p>When writing LLVMC-based drivers, it can be useful to get a visual view of the
+resulting compilation graph. This can be achieved via the command line option
+<tt class="docutils literal"><span class="pre">--view-graph</span></tt> (which assumes that <a class="reference external" href="http://www.graphviz.org/">Graphviz</a> and <a class="reference external" href="http://pages.cs.wisc.edu/~ghost/">Ghostview</a> are
+installed). There is also a <tt class="docutils literal"><span class="pre">--write-graph</span></tt> option that creates a Graphviz
+source file (<tt class="docutils literal"><span class="pre">compilation-graph.dot</span></tt>) in the current directory.</p>
+<p>Another useful <tt class="docutils literal">llvmc</tt> option is <tt class="docutils literal"><span class="pre">--check-graph</span></tt>. It checks the compilation
+graph for common errors like mismatched output/input language names, multiple
+default edges and cycles. When invoked with <tt class="docutils literal"><span class="pre">--check-graph</span></tt>, <tt class="docutils literal">llvmc</tt> doesn't
+perform any compilation tasks and returns the number of encountered errors as
+its status code. In the future, these checks will be performed at compile-time
+and this option will disappear.</p>
 </div>
 <div class="section" id="conditioning-on-the-executable-name">
-<h2><a class="toc-backref" href="#id25">Conditioning on the executable name</a></h2>
-<p>For now, the executable name (the value passed to the driver in <tt class="docutils literal"><span class="pre">argv[0]</span></tt>) is
+<h2><a class="toc-backref" href="#id21">Conditioning on the executable name</a></h2>
+<p>For now, the executable name (the value passed to the driver in <tt class="docutils literal">argv[0]</tt>) is
 accessible only in the C++ code (i.e. hooks). Use the following code:</p>
 <pre class="literal-block">
 namespace llvmc {
@@ -734,8 +665,8 @@ if (strcmp(ProgramName, &quot;mydriver&quot;) == 0) {
 </pre>
 <p>In general, you're encouraged not to make the behaviour dependent on the
 executable file name, and use command-line switches instead. See for example how
-the <tt class="docutils literal"><span class="pre">Base</span></tt> plugin behaves when it needs to choose the correct linker options
-(think <tt class="docutils literal"><span class="pre">g++</span></tt> vs. <tt class="docutils literal"><span class="pre">gcc</span></tt>).</p>
+the <tt class="docutils literal">llvmc</tt> program behaves when it needs to choose the correct linker options
+(think <tt class="docutils literal">g++</tt> vs. <tt class="docutils literal">gcc</tt>).</p>
 <hr />
 <address>
 <a href="http://jigsaw.w3.org/css-validator/check/referer">
@@ -748,7 +679,7 @@ the <tt class="docutils literal"><span class="pre">Base</span></tt> plugin behav
 <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a><br />
 <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br />
 
-Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+Last modified: $Date: 2011-04-24 16:17:37 +0200 (Sun, 24 Apr 2011) $
 </address></div>
 </div>
 </div>
diff --git a/docs/CompilerDriverTutorial.html b/docs/CompilerDriverTutorial.html
index 317b1d1..4ed373a 100644
--- a/docs/CompilerDriverTutorial.html
+++ b/docs/CompilerDriverTutorial.html
@@ -3,7 +3,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-<meta name="generator" content="Docutils 0.5: http://docutils.sourceforge.net/" />
+<meta name="generator" content="Docutils 0.6: http://docutils.sourceforge.net/" />
 <title>Tutorial - Using LLVMC</title>
 <link rel="stylesheet" href="llvm.css" type="text/css" />
 </head>
@@ -18,7 +18,7 @@ The ReST source lives in the directory 'tools/llvmc/doc'. -->
 <p class="topic-title first">Contents</p>
 <ul class="simple">
 <li><a class="reference internal" href="#introduction" id="id1">Introduction</a></li>
-<li><a class="reference internal" href="#compiling-with-llvmc" id="id2">Compiling with LLVMC</a></li>
+<li><a class="reference internal" href="#using-the-llvmc-program" id="id2">Using the <tt class="docutils literal">llvmc</tt> program</a></li>
 <li><a class="reference internal" href="#using-llvmc-to-generate-toolchain-drivers" id="id3">Using LLVMC to generate toolchain drivers</a></li>
 </ul>
 </div>
@@ -26,51 +26,47 @@ The ReST source lives in the directory 'tools/llvmc/doc'. -->
 <p>Written by <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a></p>
 </div><div class="section" id="introduction">
 <h1><a class="toc-backref" href="#id1">Introduction</a></h1>
-<p>LLVMC is a generic compiler driver, which plays the same role for LLVM
-as the <tt class="docutils literal"><span class="pre">gcc</span></tt> program does for GCC - the difference being that LLVMC
-is designed to be more adaptable and easier to customize. Most of
-LLVMC functionality is implemented via plugins, which can be loaded
-dynamically or compiled in. This tutorial describes the basic usage
-and configuration of LLVMC.</p>
+<p>LLVMC is a generic compiler driver, which plays the same role for LLVM as the
+<tt class="docutils literal">gcc</tt> program does for GCC - the difference being that LLVMC is designed to be
+more adaptable and easier to customize. Most of LLVMC functionality is
+implemented via high-level TableGen code, from which a corresponding C++ source
+file is automatically generated. This tutorial describes the basic usage and
+configuration of LLVMC.</p>
 </div>
-<div class="section" id="compiling-with-llvmc">
-<h1><a class="toc-backref" href="#id2">Compiling with LLVMC</a></h1>
-<p>In general, LLVMC tries to be command-line compatible with <tt class="docutils literal"><span class="pre">gcc</span></tt> as
-much as possible, so most of the familiar options work:</p>
+<div class="section" id="using-the-llvmc-program">
+<h1><a class="toc-backref" href="#id2">Using the <tt class="docutils literal">llvmc</tt> program</a></h1>
+<p>In general, <tt class="docutils literal">llvmc</tt> tries to be command-line compatible with <tt class="docutils literal">gcc</tt> as much
+as possible, so most of the familiar options work:</p>
 <pre class="literal-block">
 $ llvmc -O3 -Wall hello.cpp
 $ ./a.out
 hello
 </pre>
-<p>This will invoke <tt class="docutils literal"><span class="pre">llvm-g++</span></tt> under the hood (you can see which
-commands are executed by using the <tt class="docutils literal"><span class="pre">-v</span></tt> option). For further help on
-command-line LLVMC usage, refer to the <tt class="docutils literal"><span class="pre">llvmc</span> <span class="pre">--help</span></tt> output.</p>
+<p>This will invoke <tt class="docutils literal"><span class="pre">llvm-g++</span></tt> under the hood (you can see which commands are
+executed by using the <tt class="docutils literal"><span class="pre">-v</span></tt> option). For further help on command-line LLVMC
+usage, refer to the <tt class="docutils literal">llvmc <span class="pre">--help</span></tt> output.</p>
 </div>
 <div class="section" id="using-llvmc-to-generate-toolchain-drivers">
 <h1><a class="toc-backref" href="#id3">Using LLVMC to generate toolchain drivers</a></h1>
-<p>LLVMC plugins are written mostly using <a class="reference external" href="http://llvm.org/docs/TableGenFundamentals.html">TableGen</a>, so you need to
-be familiar with it to get anything done.</p>
-<p>Start by compiling <tt class="docutils literal"><span class="pre">example/Simple</span></tt>, which is a primitive wrapper for
-<tt class="docutils literal"><span class="pre">gcc</span></tt>:</p>
+<p>LLVMC-based drivers are written mostly using <a class="reference external" href="http://llvm.org/docs/TableGenFundamentals.html">TableGen</a>, so you need to be
+familiar with it to get anything done.</p>
+<p>Start by compiling <tt class="docutils literal">example/Simple</tt>, which is a primitive wrapper for
+<tt class="docutils literal">gcc</tt>:</p>
 <pre class="literal-block">
-$ cd $LLVM_DIR/tools/llvmc
-$ cp -r example/Simple plugins/Simple
-
-  # NB: A less verbose way to compile standalone LLVMC-based drivers is
-  # described in the reference manual.
-
-$ make LLVMC_BASED_DRIVER_NAME=mygcc LLVMC_BUILTIN_PLUGINS=Simple
+$ cd $LLVM_OBJ_DIR/tools/examples/Simple
+$ make
 $ cat &gt; hello.c
-[...]
-$ mygcc hello.c
+#include &lt;stdio.h&gt;
+int main() { printf(&quot;Hello\n&quot;); }
+$ $LLVM_BIN_DIR/Simple -v hello.c
+gcc hello.c -o hello.out
 $ ./hello.out
 Hello
 </pre>
-<p>Here we link our plugin with the LLVMC core statically to form an executable
-file called <tt class="docutils literal"><span class="pre">mygcc</span></tt>. It is also possible to build our plugin as a dynamic
-library to be loaded by the <tt class="docutils literal"><span class="pre">llvmc</span></tt> executable (or any other LLVMC-based
-standalone driver); this is described in the reference manual.</p>
-<p>Contents of the file <tt class="docutils literal"><span class="pre">Simple.td</span></tt> look like this:</p>
+<p>We have thus produced a simple driver called, appropriately, <tt class="docutils literal">Simple</tt>, from
+the input TableGen file <tt class="docutils literal">Simple.td</tt>. The <tt class="docutils literal">llvmc</tt> program itself is generated
+using a similar process (see <tt class="docutils literal">llvmc/src</tt>). Contents of the file <tt class="docutils literal">Simple.td</tt>
+look like this:</p>
 <pre class="literal-block">
 // Include common definitions
 include &quot;llvm/CompilerDriver/Common.td&quot;
@@ -80,33 +76,36 @@ def gcc : Tool&lt;
 [(in_language &quot;c&quot;),
  (out_language &quot;executable&quot;),
  (output_suffix &quot;out&quot;),
- (cmd_line &quot;gcc $INFILE -o $OUTFILE&quot;),
- (sink)
+ (command &quot;gcc&quot;),
+ (sink),
+
+ // -o is what is used by default, out_file_option here is included for
+ // instructive purposes.
+ (out_file_option &quot;-o&quot;)
 ]&gt;;
 
 // Language map
-def LanguageMap : LanguageMap&lt;[LangToSuffixes&lt;&quot;c&quot;, [&quot;c&quot;]&gt;]&gt;;
+def LanguageMap : LanguageMap&lt;[(lang_to_suffixes &quot;c&quot;, &quot;c&quot;)]&gt;;
 
 // Compilation graph
-def CompilationGraph : CompilationGraph&lt;[Edge&lt;&quot;root&quot;, &quot;gcc&quot;&gt;]&gt;;
+def CompilationGraph : CompilationGraph&lt;[(edge &quot;root&quot;, &quot;gcc&quot;)]&gt;;
 </pre>
-<p>As you can see, this file consists of three parts: tool descriptions,
-language map, and the compilation graph definition.</p>
-<p>At the heart of LLVMC is the idea of a compilation graph: vertices in
-this graph are tools, and edges represent a transformation path
-between two tools (for example, assembly source produced by the
-compiler can be transformed into executable code by an assembler). The
-compilation graph is basically a list of edges; a special node named
-<tt class="docutils literal"><span class="pre">root</span></tt> is used to mark graph entry points.</p>
-<p>Tool descriptions are represented as property lists: most properties
-in the example above should be self-explanatory; the <tt class="docutils literal"><span class="pre">sink</span></tt> property
-means that all options lacking an explicit description should be
-forwarded to this tool.</p>
-<p>The <tt class="docutils literal"><span class="pre">LanguageMap</span></tt> associates a language name with a list of suffixes
-and is used for deciding which toolchain corresponds to a given input
-file.</p>
-<p>To learn more about LLVMC customization, refer to the reference
-manual and plugin source code in the <tt class="docutils literal"><span class="pre">plugins</span></tt> directory.</p>
+<p>As you can see, this file consists of three parts: tool descriptions, language
+map, and the compilation graph definition.</p>
+<p>At the heart of LLVMC is the idea of a compilation graph: vertices in this graph
+are tools, and edges represent a transformation path between two tools (for
+example, assembly source produced by the compiler can be transformed into
+executable code by an assembler). The compilation graph is basically a list of
+edges; a special node named <tt class="docutils literal">root</tt> is used to mark graph entry points.</p>
+<p>Tool descriptions are represented as property lists: most properties in the
+example above should be self-explanatory; the <tt class="docutils literal">sink</tt> property means that all
+options lacking an explicit description should be forwarded to this tool.</p>
+<p>The <tt class="docutils literal">LanguageMap</tt> associates a language name with a list of suffixes and is
+used for deciding which toolchain corresponds to a given input file.</p>
+<p>To learn more about writing your own drivers with LLVMC, refer to the reference
+manual and examples in the <tt class="docutils literal">examples</tt> directory. Of a particular interest is
+the <tt class="docutils literal">Skeleton</tt> example, which can serve as a template for your LLVMC-based
+drivers.</p>
 <hr />
 <address>
 <a href="http://jigsaw.w3.org/css-validator/check/referer">
diff --git a/docs/CompilerWriterInfo.html b/docs/CompilerWriterInfo.html
index 6cd9d7d..ed326b3 100644
--- a/docs/CompilerWriterInfo.html
+++ b/docs/CompilerWriterInfo.html
@@ -9,9 +9,9 @@
 
 <body>
 
-<div class="doc_title">
+<h1>
   Architecture/platform information for compiler writers
-</div>
+</h1>
 
 <div class="doc_warning">
   <p>Note: This document is a work-in-progress.  Additions and clarifications
@@ -43,13 +43,15 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="hw">Hardware</a></div>
+<h2><a name="hw">Hardware</a></h2>
 <!-- *********************************************************************** -->
 
+<div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="alpha">Alpha</a></div>
+<h3><a name="alpha">Alpha</a></h3>
 
-<div class="doc_text">
+<div>
 <ul>
 <li><a
 href="http://ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html">Alpha manuals</a> 
@@ -58,9 +60,9 @@ href="http://ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-libra
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="arm">ARM</a></div>
+<h3><a name="arm">ARM</a></h3>
 
-<div class="doc_text">
+<div>
 <ul>
 <li><a href="http://www.arm.com/documentation/">ARM documentation</a> 
 (<a href="http://www.arm.com/documentation/ARMProcessor_Cores/">Processor
@@ -70,9 +72,9 @@ Cores</a>)</li>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="ia64">Itanium (ia64)</a></div>
+<h3><a name="ia64">Itanium (ia64)</a></h3>
 
-<div class="doc_text">
+<div>
 <ul>
 <li><a
 href="http://developer.intel.com/design/itanium2/documentation.htm">Itanium documentation</a> 
@@ -81,9 +83,9 @@ href="http://developer.intel.com/design/itanium2/documentation.htm">Itanium docu
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="mips">MIPS</a></div>
+<h3><a name="mips">MIPS</a></h3>
 
-<div class="doc_text">
+<div>
 <ul>
 <li><a
 href="http://mips.com/content/Documentation/MIPSDocumentation/ProcessorArchitecture/doclibrary">MIPS
@@ -92,12 +94,14 @@ Processor Architecture</a></li>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="ppc">PowerPC</a></div>
+<h3><a name="ppc">PowerPC</a></h3>
+
+<div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">IBM - Official manuals and docs</div>
+<h4>IBM - Official manuals and docs</h4>
 
-<div class="doc_text">
+<div>
 
 <ul>
 <li><a
@@ -129,9 +133,9 @@ PowerPC architecture</a></li>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">Other documents, collections, notes</div>
+<h4>Other documents, collections, notes</h4>
 
-<div class="doc_text">
+<div>
 
 <ul>
 <li><a href="http://penguinppc.org/dev/#library">PowerPC ABI documents</a></li>
@@ -143,10 +147,12 @@ branch stubs for powerpc64-linux (from binutils)</a></li>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="sparc">SPARC</a></div>
+<h3><a name="sparc">SPARC</a></h3>
 
-<div class="doc_text">
+<div>
 
 <ul>
 <li><a href="http://www.sparc.org/resource.htm">SPARC resources</a></li>
@@ -156,12 +162,14 @@ branch stubs for powerpc64-linux (from binutils)</a></li>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="x86">X86</a></div>
+<h3><a name="x86">X86</a></h3>
+
+<div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">AMD - Official manuals and docs</div>
+<h4>AMD - Official manuals and docs</h4>
 
-<div class="doc_text">
+<div>
 <ul>
 <li><a
 href="http://www.amd.com/us-en/Processors/TechnicalResources/0,,30_182_739,00.html">AMD processor manuals</a></li>
@@ -170,9 +178,9 @@ href="http://www.amd.com/us-en/Processors/TechnicalResources/0,,30_182_739,00.ht
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">Intel - Official manuals and docs</div>
+<h4>Intel - Official manuals and docs</h4>
 
-<div class="doc_text">
+<div>
 <ul>
 <li><a
 href="http://developer.intel.com/design/pentium4/manuals/index_new.htm">IA-32
@@ -184,19 +192,21 @@ Itanium documentation</a></li>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">Other x86-specific information</div>
+<h4>Other x86-specific information</h4>
 
-<div class="doc_text">
+<div>
 <ul>
 <li><a href="http://www.agner.org/assem/calling_conventions.pdf">Calling
 conventions for different C++ compilers and operating systems</a></li>
 </ul>
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="other">Other relevant lists</a></div>
+<h3><a name="other">Other relevant lists</a></h3>
 
-<div class="doc_text">
+<div>
 
 <ul>
 <li><a href="http://gcc.gnu.org/readings.html">GCC reading list</a></li>
@@ -204,14 +214,18 @@ conventions for different C++ compilers and operating systems</a></li>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="abi">ABI</a></div>
+<h2><a name="abi">ABI</a></h2>
 <!-- *********************************************************************** -->
 
+<div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="linux">Linux</a></div>
+<h3><a name="linux">Linux</a></h3>
 
-<div class="doc_text">
+<div>
 <ol>
 <li><a href="http://www.linuxbase.org/spec/ELF/ppc64/">PowerPC 64-bit ELF ABI
 Supplement</a></li>
@@ -219,9 +233,9 @@ Supplement</a></li>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="osx">OS X</a></div>
+<h3><a name="osx">OS X</a></h3>
 
-<div class="doc_text">
+<div>
 <ol>
 <li><a
 href="http://developer.apple.com/documentation/Darwin/RuntimeArchitecture-date.html">Mach-O
@@ -232,8 +246,10 @@ ABI</a></li>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="misc">Miscellaneous resources</a></div>
+<h2><a name="misc">Miscellaneous resources</a></h2>
 <!-- *********************************************************************** -->
 
 <ul>
@@ -255,8 +271,8 @@ processors.</li>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://misha.brukman.net">Misha Brukman</a><br>
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/DebuggingJITedCode.html b/docs/DebuggingJITedCode.html
index a9193f2..fa15633 100644
--- a/docs/DebuggingJITedCode.html
+++ b/docs/DebuggingJITedCode.html
@@ -7,7 +7,7 @@
 </head>
 <body>
 
-<div class="doc_title">Debugging JITed Code With GDB</div>
+<h1>Debugging JITed Code With GDB</h1>
 <ol>
   <li><a href="#example">Example usage</a></li>
   <li><a href="#background">Background</a></li>
@@ -15,9 +15,9 @@
 <div class="doc_author">Written by Reid Kleckner</div>
 
 <!--=========================================================================-->
-<div class="doc_section"><a name="example">Example usage</a></div>
+<h2><a name="example">Example usage</a></h2>
 <!--=========================================================================-->
-<div class="doc_text">
+<div>
 
 <p>In order to debug code JITed by LLVM, you need GDB 7.0 or newer, which is
 available on most modern distributions of Linux.  The version of GDB that Apple
@@ -96,9 +96,9 @@ function names.
 </div>
 
 <!--=========================================================================-->
-<div class="doc_section"><a name="background">Background</a></div>
+<h2><a name="background">Background</a></h2>
 <!--=========================================================================-->
-<div class="doc_text">
+<div>
 
 <p>Without special runtime support, debugging dynamically generated code with
 GDB (as well as most debuggers) can be quite painful.  Debuggers generally read
@@ -145,8 +145,8 @@ coordinate with GDB to get better debug information.
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
   <a href="mailto:reid.kleckner@gmail.com">Reid Kleckner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-07-07 22:16:45 +0200 (Wed, 07 Jul 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/DeveloperPolicy.html b/docs/DeveloperPolicy.html
index ef99ebc..c121657 100644
--- a/docs/DeveloperPolicy.html
+++ b/docs/DeveloperPolicy.html
@@ -8,7 +8,7 @@
 </head>
 <body>
       
-<div class="doc_title">LLVM Developer Policy</div>
+<h1>LLVM Developer Policy</h1>
 <ol>
   <li><a href="#introduction">Introduction</a></li>
   <li><a href="#policies">Developer Policies</a>
@@ -34,9 +34,9 @@
 <div class="doc_author">Written by the LLVM Oversight Team</div>
 
 <!--=========================================================================-->
-<div class="doc_section"><a name="introduction">Introduction</a></div>
+<h2><a name="introduction">Introduction</a></h2>
 <!--=========================================================================-->
-<div class="doc_text">
+<div>
 <p>This document contains the LLVM Developer Policy which defines the project's
    policy towards developers and their contributions. The intent of this policy
    is to eliminate miscommunication, rework, and confusion that might arise from
@@ -63,20 +63,19 @@
 </div>
 
 <!--=========================================================================-->
-<div class="doc_section"><a name="policies">Developer Policies</a></div>
+<h2><a name="policies">Developer Policies</a></h2>
 <!--=========================================================================-->
-<div class="doc_text">
+<div>
 <p>This section contains policies that pertain to frequent LLVM developers.  We
    always welcome <a href="#patches">one-off patches</a> from people who do not
    routinely contribute to LLVM, but we expect more from frequent contributors
    to keep the system as efficient as possible for everyone.  Frequent LLVM
    contributors are expected to meet the following requirements in order for
    LLVM to maintain a high standard of quality.<p>
-</div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"> <a name="informed">Stay Informed</a> </div>
-<div class="doc_text">
+<h3><a name="informed">Stay Informed</a></h3>
+<div>
 <p>Developers should stay informed by reading at least the "dev" mailing list
    for the projects you are interested in, such as 
    <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev</a> for
@@ -102,9 +101,9 @@
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"> <a name="patches">Making a Patch</a></div>
+<h3><a name="patches">Making a Patch</a></h3>
 
-<div class="doc_text">
+<div>
 <p>When making a patch for review, the goal is to make it as easy for the
    reviewer to read it as possible.  As such, we recommend that you:</p>
 
@@ -142,8 +141,8 @@
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"> <a name="reviews">Code Reviews</a></div>
-<div class="doc_text">
+<h3><a name="reviews">Code Reviews</a></h3>
+<div>
 <p>LLVM has a code review policy. Code review is one way to increase the quality
    of software. We generally follow these policies:</p>
 
@@ -174,8 +173,8 @@
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"> <a name="owners">Code Owners</a></div>
-<div class="doc_text">
+<h3><a name="owners">Code Owners</a></h3>
+<div>
 
 <p>The LLVM Project relies on two features of its process to maintain rapid
    development in addition to the high quality of its source base: the
@@ -225,8 +224,8 @@
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"> <a name="testcases">Test Cases</a></div>
-<div class="doc_text">
+<h3><a name="testcases">Test Cases</a></h3>
+<div>
 <p>Developers are required to create test cases for any bugs fixed and any new
    features added.  Some tips for getting your testcase approved:</p>
 
@@ -258,8 +257,8 @@
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"> <a name="quality">Quality</a></div>
-<div class="doc_text">
+<h3><a name="quality">Quality</a></h3>
+<div>
 <p>The minimum quality standards that any change must satisfy before being
    committed to the main development branch are:</p>
 
@@ -318,9 +317,8 @@
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection">
-  <a name="commitaccess">Obtaining Commit Access</a></div>
-<div class="doc_text">
+<h3><a name="commitaccess">Obtaining Commit Access</a></h3>
+<div>
 
 <p>We grant commit access to contributors with a track record of submitting high
    quality patches.  If you would like commit access, please send an email to
@@ -381,8 +379,8 @@
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"> <a name="newwork">Making a Major Change</a></div>
-<div class="doc_text">
+<h3><a name="newwork">Making a Major Change</a></h3>
+<div>
 <p>When a developer begins a major new project with the aim of contributing it
    back to LLVM, s/he should inform the community with an email to
    the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev</a>
@@ -410,9 +408,8 @@
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"> <a name="incremental">Incremental Development</a>
-</div>
-<div class="doc_text">
+<h3><a name="incremental">Incremental Development</a></h3>
+<div>
 <p>In the LLVM project, we do all significant changes as a series of incremental
    patches.  We have a strong dislike for huge changes or long-term development
    branches.  Long-term development branches have a number of drawbacks:</p>
@@ -472,9 +469,8 @@
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="attribution">Attribution of 
-Changes</a></div>
-<div class="doc_text">
+<h3><a name="attribution">Attribution of Changes</a></h3>
+<div>
 <p>We believe in correct attribution of contributions to their contributors.
    However, we do not want the source code to be littered with random
    attributions "this code written by J. Random Hacker" (this is noisy and
@@ -486,13 +482,15 @@ Changes</a></div>
 <p>Overall, please do not add contributor names to the source code.</p>
 </div>
 
+</div>
+
 <!--=========================================================================-->
-<div class="doc_section">
+<h2>
   <a name="clp">Copyright, License, and Patents</a>
-</div>
+</h2>
 <!--=========================================================================-->
 
-<div class="doc_text">
+<div>
 <p>This section addresses the issues of copyright, license and patents for the
    LLVM project.  Currently, the University of Illinois is the LLVM copyright
    holder and the terms of its license to LLVM users and developers is the
@@ -504,11 +502,10 @@ Changes</a></div>
    legal matters but does not provide legal advice.  We are not lawyers, please
    seek legal counsel from an attorney.</p>
 </div>
-</div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="copyright">Copyright</a></div>
-<div class="doc_text">
+<h3><a name="copyright">Copyright</a></h3>
+<div>
 
 <p>The LLVM project does not require copyright assignments, which means that the
    copyright for the code in the project is held by its respective contributors
@@ -530,8 +527,8 @@ Changes</a></div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="license">License</a></div>
-<div class="doc_text">
+<h3><a name="license">License</a></h3>
+<div>
 <p>We intend to keep LLVM perpetually open source and to use a liberal open
    source license. All of the code in LLVM is available under the
    <a href="http://www.opensource.org/licenses/UoI-NCSA.php">University of
@@ -585,8 +582,8 @@ Changes</a></div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="patents">Patents</a></div>
-<div class="doc_text">
+<h3><a name="patents">Patents</a></h3>
+<div>
 <p>To the best of our knowledge, LLVM does not infringe on any patents (we have
    actually removed code from LLVM in the past that was found to infringe).
    Having code in LLVM that infringes on patents would violate an important goal
@@ -602,6 +599,8 @@ Changes</a></div>
    details.</p>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 <hr>
 <address>
@@ -611,8 +610,8 @@ Changes</a></div>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
   Written by the 
   <a href="mailto:llvm-oversight@cs.uiuc.edu">LLVM Oversight Group</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-11-16 22:32:53 +0100 (Tue, 16 Nov 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/ExceptionHandling.html b/docs/ExceptionHandling.html
index 009dbb5..16820f3 100644
--- a/docs/ExceptionHandling.html
+++ b/docs/ExceptionHandling.html
@@ -11,7 +11,7 @@
 
 <body>
 
-<div class="doc_title">Exception Handling in LLVM</div>
+<h1>Exception Handling in LLVM</h1>
 
 <table class="layout" style="width:100%">
   <tr class="layout">
@@ -58,10 +58,10 @@
 
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="introduction">Introduction</a></div>
+<h2><a name="introduction">Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This document is the central repository for all information pertaining to
    exception handling in LLVM.  It describes the format that LLVM exception
@@ -70,14 +70,12 @@
    provides specific examples of what exception handling information is used for
    in C/C++.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="itanium">Itanium ABI Zero-cost Exception Handling</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Exception handling for most programming languages is designed to recover from
    conditions that rarely occur during general use of an application.  To that
@@ -106,11 +104,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="sjlj">Setjmp/Longjmp Exception Handling</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Setjmp/Longjmp (SJLJ) based exception handling uses LLVM intrinsics
    <a href="#llvm_eh_sjlj_setjmp"><tt>llvm.eh.sjlj.setjmp</tt></a> and
@@ -138,11 +136,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="overview">Overview</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>When an exception is thrown in LLVM code, the runtime does its best to find a
    handler suited to processing the circumstance.</p>
@@ -185,12 +183,14 @@
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_section">
+<h2>
   <a name="codegen">LLVM Code Generation</a>
-</div>
+</h2>
 
-<div class="doc_text">
+<div>
 
 <p>At the time of this writing, only C++ exception handling support is available
    in LLVM.  So the remainder of this document will be somewhat C++-centric.</p>
@@ -200,14 +200,12 @@
    we will describe the implementation of LLVM exception handling in terms of
    C++ examples.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="throw">Throw</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Languages that support exception handling typically provide a <tt>throw</tt>
    operation to initiate the exception process.  Internally, a throw operation
@@ -225,11 +223,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="try_catch">Try/Catch</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>A call within the scope of a <i>try</i> statement can potentially raise an
    exception.  In those circumstances, the LLVM C++ front-end replaces the call
@@ -313,11 +311,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="cleanups">Cleanups</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>To handle destructors and cleanups in <tt>try</tt> code, control may not run
    directly from a landing pad to the first catch.  Control may actually flow
@@ -332,11 +330,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="throw_filters">Throw Filters</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>C++ allows the specification of which exception types can be thrown from a
    function.  To represent this a top level landing pad may exist to filter out
@@ -359,11 +357,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="restrictions">Restrictions</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The semantics of the invoke instruction require that any exception that
    unwinds through an invoke call should result in a branch to the invoke's
@@ -384,25 +382,25 @@
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_section">
+<h2>
   <a name="format_common_intrinsics">Exception Handling Intrinsics</a>
-</div>
+</h2>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM uses several intrinsic functions (name prefixed with "llvm.eh") to
    provide exception handling information at various points in generated
    code.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="llvm_eh_exception">llvm.eh.exception</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <pre>
   i8* %<a href="#llvm_eh_exception">llvm.eh.exception</a>()
@@ -413,11 +411,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="llvm_eh_selector">llvm.eh.selector</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <pre>
   i32 %<a href="#llvm_eh_selector">llvm.eh.selector</a>(i8*, i8*, ...)
@@ -445,11 +443,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="llvm_eh_typeid_for">llvm.eh.typeid.for</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <pre>
   i32 %<a href="#llvm_eh_typeid_for">llvm.eh.typeid.for</a>(i8*)
@@ -463,11 +461,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="llvm_eh_sjlj_setjmp">llvm.eh.sjlj.setjmp</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <pre>
   i32 %<a href="#llvm_eh_sjlj_setjmp">llvm.eh.sjlj.setjmp</a>(i8*)
@@ -492,11 +490,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="llvm_eh_sjlj_longjmp">llvm.eh.sjlj.longjmp</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <pre>
   void %<a href="#llvm_eh_sjlj_longjmp">llvm.eh.sjlj.setjmp</a>(i8*)
@@ -507,16 +505,16 @@
    style exception handling. The single parameter is a pointer to a
    buffer populated by <a href="#llvm_eh_sjlj_setjmp">
      <tt>llvm.eh.sjlj.setjmp</tt></a>. The frame pointer and stack pointer
-   are restored from the buffer, then control is transfered to the
+   are restored from the buffer, then control is transferred to the
    destination address.</p>
 
 </div>
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="llvm_eh_sjlj_lsda">llvm.eh.sjlj.lsda</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <pre>
   i8* %<a href="#llvm_eh_sjlj_lsda">llvm.eh.sjlj.lsda</a>()
@@ -531,11 +529,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="llvm_eh_sjlj_callsite">llvm.eh.sjlj.callsite</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <pre>
   void %<a href="#llvm_eh_sjlj_callsite">llvm.eh.sjlj.callsite</a>(i32)
@@ -549,11 +547,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="llvm_eh_sjlj_dispatchsetup">llvm.eh.sjlj.dispatchsetup</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <pre>
   void %<a href="#llvm_eh_sjlj_dispatchsetup">llvm.eh.sjlj.dispatchsetup</a>(i32)
@@ -565,24 +563,24 @@
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_section">
+<h2>
   <a name="asm">Asm Table Formats</a>
-</div>
+</h2>
 
-<div class="doc_text">
+<div>
 
 <p>There are two tables that are used by the exception handling runtime to
    determine which actions should take place when an exception is thrown.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="unwind_tables">Exception Handling Frame</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>An exception handling frame <tt>eh_frame</tt> is very similar to the unwind
    frame used by dwarf debug info.  The frame contains all the information
@@ -596,11 +594,11 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="exception_tables">Exception Tables</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>An exception table contains information about what actions to take when an
    exception is thrown in a particular part of a function's code.  There is one
@@ -611,12 +609,14 @@
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_section">
+<h2>
   <a name="todo">ToDo</a>
-</div>
+</h2>
 
-<div class="doc_text">
+<div>
 
 <ol>
 
@@ -636,8 +636,8 @@
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-12-10 00:05:48 +0100 (Fri, 10 Dec 2010) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/ExtendingLLVM.html b/docs/ExtendingLLVM.html
index 2237560..03cfa7e 100644
--- a/docs/ExtendingLLVM.html
+++ b/docs/ExtendingLLVM.html
@@ -8,9 +8,9 @@
 
 <body>
 
-<div class="doc_title">
+<h1>
   Extending LLVM: Adding instructions, intrinsics, types, etc.
-</div>
+</h1>
 
 <ol>
   <li><a href="#introduction">Introduction and Warning</a></li>
@@ -31,12 +31,12 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="introduction">Introduction and Warning</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>During the course of using LLVM, you may wish to customize it for your
 research project or for experimentation. At this point, you may realize that
@@ -68,12 +68,12 @@ effort by doing so.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="intrinsic">Adding a new intrinsic function</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Adding a new intrinsic function to LLVM is much easier than adding a new
 instruction.  Almost all extensions to LLVM should start as an intrinsic
@@ -130,12 +130,12 @@ support for it.  Generally you must do the following steps:</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="sdnode">Adding a new SelectionDAG node</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>As with intrinsics, adding a new SelectionDAG node to LLVM is much easier
 than adding a new instruction.  New nodes are often added to help represent
@@ -220,12 +220,12 @@ complicated behavior in a single node (rotate).</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="instruction">Adding a new instruction</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p><span class="doc_warning">WARNING: adding instructions changes the bitcode
 format, and it will take some effort to maintain compatibility with
@@ -277,25 +277,23 @@ to understand this new instruction.</p>
 
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="type">Adding a new type</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p><span class="doc_warning">WARNING: adding new types changes the bitcode
 format, and will break compatibility with currently-existing LLVM
 installations.</span> Only add new types if it is absolutely necessary.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="fund_type">Adding a fundamental type</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <ol>
 
@@ -317,11 +315,11 @@ installations.</span> Only add new types if it is absolutely necessary.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="derived_type">Adding a derived type</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <ol>
 <li><tt>llvm/include/llvm/Type.h</tt>:
@@ -373,6 +371,8 @@ void calcTypeName(const Type *Ty,
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 
 <hr>
@@ -382,9 +382,9 @@ void calcTypeName(const Type *Ty,
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/FAQ.html b/docs/FAQ.html
index 9415a90..20ba1d5 100644
--- a/docs/FAQ.html
+++ b/docs/FAQ.html
@@ -12,9 +12,9 @@
 </head>
 <body>
 
-<div class="doc_title">
+<h1>
   LLVM: Frequently Asked Questions
-</div>
+</h1>
 
 <ol>
   <li><a href="#license">License</a>
@@ -133,14 +133,14 @@
 </ol>
 
 <div class="doc_author">
-  <p>Written by <a href="http://llvm.org">The LLVM Team</a></p>
+  <p>Written by <a href="http://llvm.org/">The LLVM Team</a></p>
 </div>
 
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="license">License</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
 <div class="question">
@@ -189,9 +189,9 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="source">Source Code</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
 <div class="question">
@@ -227,9 +227,9 @@ LLVM have been ported to a plethora of platforms.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="build">Build Problems</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
 <div class="question">
@@ -449,7 +449,9 @@ Stop.
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="felangs">Source Languages</a></div>
+<h2>
+  <a name="felangs">Source Languages</a>
+</h2>
 
 <div class="question">
 <p><a name="langs">What source languages are supported?</a></p>
@@ -555,9 +557,9 @@ Stop.
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="cfe">Using the GCC Front End</a>
-</div>
+</h2>
 
 <div class="question">
 <p>When I compile software that uses a configure script, the configure script
@@ -712,9 +714,9 @@ Stop.
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="cfe_code">Questions about code generated by the GCC front-end</a>
-</div>
+</h2>
 
 <div class="question">
 <p><a name="iosinit">What is this <tt>llvm.global_ctors</tt> and
@@ -930,8 +932,8 @@ F.i:
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-19 01:59:50 +0200 (Tue, 19 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/GCCFEBuildInstrs.html b/docs/GCCFEBuildInstrs.html
index 0b2827c..f502481 100644
--- a/docs/GCCFEBuildInstrs.html
+++ b/docs/GCCFEBuildInstrs.html
@@ -8,9 +8,9 @@
 </head>
 <body>
 
-<div class="doc_title">
+<h1>
   Building the LLVM GCC Front-End
-</div>
+</h1>
 
 <ol>
   <li><a href="#instructions">Building llvm-gcc from Source</a></li>
@@ -24,10 +24,10 @@
 </div>
 
 <!-- *********************************************************************** -->
-<h1><a name="instructions">Building llvm-gcc from Source</a></h1>
+<h2><a name="instructions">Building llvm-gcc from Source</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This section describes how to acquire and build llvm-gcc 4.2, which is based
 on the GCC 4.2.1 front-end.  Supported languages are Ada, C, C++, Fortran,
@@ -67,10 +67,10 @@ svn co http://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk <i>dst-directory</i>
 </div>
 
 <!-- *********************************************************************** -->
-<h1><a name="ada">Building the Ada front-end</a></h1>
+<h2><a name="ada">Building the Ada front-end</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 <p>Building with support for Ada amounts to following the directions in the
 top-level <tt>README.LLVM</tt> file, adding ",ada" to EXTRALANGS, for example:
 <tt>EXTRALANGS=,ada</tt></p>
@@ -100,7 +100,7 @@ top-level <tt>README.LLVM</tt> file, adding ",ada" to EXTRALANGS, for example:
   <li><p>Because the Ada front-end is experimental, it is wise to build the
       compiler with checking enabled.  This causes it to run much slower, but
       helps catch mistakes in the compiler (please report any problems using
-      <a href="http://llvm.org/bugs">LLVM bugzilla</a>).</p></li>
+      <a href="http://llvm.org/bugs/">LLVM bugzilla</a>).</p></li>
   <li><p>The Ada front-end <a href="http://llvm.org/PR2007">fails to
       bootstrap</a>, due to lack of LLVM support for
       <tt>setjmp</tt>/<tt>longjmp</tt> style exception handling (used
@@ -233,10 +233,10 @@ make install
 </div>
 
 <!-- *********************************************************************** -->
-<h1><a name="fortran">Building the Fortran front-end</a></h1>
+<h2><a name="fortran">Building the Fortran front-end</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 <p>To build with support for Fortran, follow the directions in the top-level
 <tt>README.LLVM</tt> file, adding ",fortran" to EXTRALANGS, for example:</p>
 
@@ -247,10 +247,10 @@ EXTRALANGS=,fortran
 </div>
 
 <!-- *********************************************************************** -->
-<h1><a name="license">License Information</a></h1>
+<h2><a name="license">License Information</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 <p>
 The LLVM GCC frontend is licensed to you under the GNU General Public License
 and the GNU Lesser General Public License.  Please see the files COPYING and
@@ -271,8 +271,8 @@ More information is <a href="FAQ.html#license">available in the FAQ</a>.
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-08-31 21:40:21 +0200 (Tue, 31 Aug 2010) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/GarbageCollection.html b/docs/GarbageCollection.html
index fde070c..13a3714 100644
--- a/docs/GarbageCollection.html
+++ b/docs/GarbageCollection.html
@@ -13,9 +13,9 @@
 </head>
 <body>
 
-<div class="doc_title">
+<h1>
   Accurate Garbage Collection with LLVM
-</div>
+</h1>
 
 <ol>
   <li><a href="#introduction">Introduction</a>
@@ -79,12 +79,12 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="introduction">Introduction</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Garbage collection is a widely used technique that frees the programmer from
 having to know the lifetimes of heap objects, making software easier to produce
@@ -124,14 +124,12 @@ techniques dominates any low-level losses.</p>
 <p>This document describes the mechanisms and interfaces provided by LLVM to
 support accurate garbage collection.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="feature">Goals and non-goals</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM's intermediate representation provides <a href="#intrinsics">garbage
 collection intrinsics</a> that offer support for a broad class of
@@ -151,14 +149,14 @@ collector models. For instance, the intrinsics permit:</p>
 support a broad class of garbage collected languages including Scheme, ML, Java,
 C#, Perl, Python, Lua, Ruby, other scripting languages, and more.</p>
 
-<p>However, LLVM does not itself provide a garbage collector&#151;this should
+<p>However, LLVM does not itself provide a garbage collector&mdash;this should
 be part of your language's runtime library. LLVM provides a framework for
 compile time <a href="#plugin">code generation plugins</a>. The role of these
 plugins is to generate code and data structures which conforms to the <em>binary
 interface</em> specified by the <em>runtime library</em>. This is similar to the
 relationship between LLVM and DWARF debugging info, for example. The
 difference primarily lies in the lack of an established standard in the domain
-of garbage collection&#151;thus the plugins.</p>
+of garbage collection&mdash;thus the plugins.</p>
 
 <p>The aspects of the binary interface with which LLVM's GC support is
 concerned are:</p>
@@ -198,13 +196,15 @@ compiler matures.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="quickstart">Getting started</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Using a GC with LLVM implies many things, for example:</p>
 
@@ -246,14 +246,12 @@ compiler matures.</p>
 includes a highly portable, built-in ShadowStack code generator. It is compiled
 into <tt>llc</tt> and works even with the interpreter and C backends.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="quickstart-compiler">In your compiler</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>To turn the shadow stack on for your functions, first call:</p>
 
@@ -276,11 +274,11 @@ switching to a more advanced GC.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="quickstart-runtime">In your runtime</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The shadow stack doesn't imply a memory allocation algorithm. A semispace
 collector or building atop <tt>malloc</tt> are great places to start, and can
@@ -343,11 +341,11 @@ void visitGCRoots(void (*Visitor)(void **Root, const void *Meta)) {
 }</pre></div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="shadow-stack">About the shadow stack</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Unlike many GC algorithms which rely on a cooperative code generator to
 compile stack maps, this algorithm carefully maintains a linked list of stack
@@ -372,13 +370,15 @@ in order to improve performance.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="core">IR features</a><a name="intrinsics"></a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This section describes the garbage collection facilities provided by the
 <a href="LangRef.html">LLVM intermediate representation</a>. The exact behavior
@@ -390,18 +390,16 @@ intended to be a complete interface to any garbage collector. A program will
 need to interface with the GC library using the facilities provided by that
 program.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="gcattr">Specifying GC code generation: <tt>gc "..."</tt></a>
-</div>
+</h3>
 
 <div class="doc_code"><tt>
   define <i>ty</i> @<i>name</i>(...) <span style="text-decoration: underline">gc "<i>name</i>"</span> { ...
 </tt></div>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>gc</tt> function attribute is used to specify the desired GC style
 to the compiler. Its programmatic equivalent is the <tt>setGC</tt> method of
@@ -418,15 +416,15 @@ programs that use different garbage collection algorithms (or none at all).</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="gcroot">Identifying GC roots on the stack: <tt>llvm.gcroot</tt></a>
-</div>
+</h3>
 
 <div class="doc_code"><tt>
   void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
 </tt></div>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>llvm.gcroot</tt> intrinsic is used to inform LLVM that a stack
 variable references an object on the heap and is to be tracked for garbage
@@ -494,11 +492,11 @@ CodeBlock:
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="barriers">Reading and writing references in the heap</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Some collectors need to be informed when the mutator (the program that needs
 garbage collection) either reads a pointer from or writes a pointer to a field
@@ -534,18 +532,16 @@ require the corresponding barrier. Such a GC plugin will replace the intrinsic
 calls with the corresponding <tt>load</tt> or <tt>store</tt> instruction if they
 are used.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="gcwrite">Write barrier: <tt>llvm.gcwrite</tt></a>
-</div>
+</h4>
 
 <div class="doc_code"><tt>
 void @llvm.gcwrite(i8* %value, i8* %object, i8** %derived)
 </tt></div>
 
-<div class="doc_text">
+<div>
 
 <p>For write barriers, LLVM provides the <tt>llvm.gcwrite</tt> intrinsic
 function. It has exactly the same semantics as a non-volatile <tt>store</tt> to
@@ -559,15 +555,15 @@ implement reference counting.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="gcread">Read barrier: <tt>llvm.gcread</tt></a>
-</div>
+</h4>
 
 <div class="doc_code"><tt>
 i8* @llvm.gcread(i8* %object, i8** %derived)<br>
 </tt></div>
 
-<div class="doc_text">
+<div>
 
 <p>For read barriers, LLVM provides the <tt>llvm.gcread</tt> intrinsic function.
 It has exactly the same semantics as a non-volatile <tt>load</tt> from the
@@ -580,13 +576,17 @@ writes.</p>
 
 </div>
 
+</div>
+
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="plugin">Implementing a collector plugin</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>User code specifies which GC code generation to use with the <tt>gc</tt>
 function attribute or, equivalently, with the <tt>setGC</tt> method of
@@ -666,14 +666,12 @@ $ llvm-as &lt; sample.ll | llc -load=MyGC.so</pre></blockquote>
 <p>It is also possible to statically link the collector plugin into tools, such
 as a language-specific compiler front-end.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="collector-algos">Overview of available features</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p><tt>GCStrategy</tt> provides a range of features through which a plugin
 may do useful work. Some of these are callbacks, some are algorithms that can
@@ -958,11 +956,11 @@ interest.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="stack-map">Computing stack maps</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM automatically computes a stack map. One of the most important features
 of a <tt>GCStrategy</tt> is to compile this information into the executable in
@@ -1014,11 +1012,11 @@ for collector plugins which implement reference counting or a shadow stack.</p>
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="init-roots">Initializing roots to null: <tt>InitRoots</tt></a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <blockquote><pre
 >MyGC::MyGC() {
@@ -1039,12 +1037,12 @@ this feature should be used by all GC plugins. It is enabled by default.</p>
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="custom">Custom lowering of intrinsics: <tt>CustomRoots</tt>, 
     <tt>CustomReadBarriers</tt>, and <tt>CustomWriteBarriers</tt></a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>For GCs which use barriers or unusual treatment of stack roots, these
 flags allow the collector to perform arbitrary transformations of the LLVM
@@ -1129,11 +1127,11 @@ bool MyGC::performCustomLowering(Function &amp;F) {
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="safe-points">Generating safe points: <tt>NeededSafePoints</tt></a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM can compute four kinds of safe points:</p>
 
@@ -1193,11 +1191,11 @@ safe point (because only the topmost function has been patched).</p>
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="assembly">Emitting assembly code: <tt>GCMetadataPrinter</tt></a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM allows a plugin to print arbitrary assembly code before and after the
 rest of a module's assembly code. At the end of the module, the GC can compile
@@ -1341,14 +1339,15 @@ void MyGCPrinter::finishAssembly(std::ostream &amp;OS, AsmPrinter &amp;AP,
 
 </div>
 
+</div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="references">References</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p><a name="appel89">[Appel89]</a> Runtime Tags Aren't Necessary. Andrew
 W. Appel. Lisp and Symbolic Computation 19(7):703-705, July 1989.</p>
@@ -1379,8 +1378,8 @@ Fergus Henderson. International Symposium on Memory Management 2002.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-11 22:16:09 +0200 (Tue, 11 May 2010) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/GetElementPtr.html b/docs/GetElementPtr.html
index 5410137..4c347a6 100644
--- a/docs/GetElementPtr.html
+++ b/docs/GetElementPtr.html
@@ -11,9 +11,9 @@
 </head>
 <body>
 
-<div class="doc_title">
+<h1>
   The Often Misunderstood GEP Instruction
-</div>
+</h1>
 
 <ol>
   <li><a href="#intro">Introduction</a></li>
@@ -58,10 +58,10 @@
 
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="intro"><b>Introduction</b></a></div>
+<h2><a name="intro">Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text"> 
+<div>
   <p>This document seeks to dispel the mystery and confusion surrounding LLVM's
   <a href="LangRef.html#i_getelementptr">GetElementPtr</a> (GEP) instruction.
   Questions about the wily GEP instruction are
@@ -72,21 +72,20 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="addresses"><b>Address Computation</b></a></div>
+<h2><a name="addresses">Address Computation</a></h2>
 <!-- *********************************************************************** -->
-<div class="doc_text">
+<div>
   <p>When people are first confronted with the GEP instruction, they tend to
   relate it to known concepts from other programming paradigms, most notably C
   array indexing and field selection. GEP closely resembles C array indexing
   and field selection, however it's is a little different and this leads to
   the following questions.</p>
-</div>
 
 <!-- *********************************************************************** -->
-<div class="doc_subsection">
-  <a name="firstptr"><b>What is the first index of the GEP instruction?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="firstptr">What is the first index of the GEP instruction?</a>
+</h3>
+<div>
   <p>Quick answer: The index stepping through the first operand.</p> 
   <p>The confusion with the first index usually arises from thinking about 
   the GetElementPtr instruction as if it was a C index operator. They aren't the
@@ -205,11 +204,11 @@ idx3 = (char*) &amp;MyVar + 8
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_subsection">
-  <a name="extra_index"><b>Why is the extra 0 index required?</b></a>
-</div>
+<h3>
+  <a name="extra_index">Why is the extra 0 index required?</a>
+</h3>
 <!-- *********************************************************************** -->
-<div class="doc_text">
+<div>
   <p>Quick answer: there are no superfluous indices.</p>
   <p>This question arises most often when the GEP instruction is applied to a
   global variable which is always a pointer type. For example, consider
@@ -247,10 +246,10 @@ idx3 = (char*) &amp;MyVar + 8
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_subsection">
-  <a name="deref"><b>What is dereferenced by GEP?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="deref">What is dereferenced by GEP?</a>
+</h3>
+<div>
   <p>Quick answer: nothing.</p> 
   <p>The GetElementPtr instruction dereferences nothing. That is, it doesn't
   access memory in any way. That's what the Load and Store instructions are for.
@@ -302,10 +301,10 @@ idx3 = (char*) &amp;MyVar + 8
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_subsection">
-  <a name="lead0"><b>Why don't GEP x,0,0,1 and GEP x,1 alias?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="lead0">Why don't GEP x,0,0,1 and GEP x,1 alias?</a>
+</h3>
+<div>
   <p>Quick Answer: They compute different address locations.</p>
   <p>If you look at the first indices in these GEP
   instructions you find that they are different (0 and 1), therefore the address
@@ -331,10 +330,10 @@ idx3 = (char*) &amp;MyVar + 8
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_subsection">
-  <a name="trail0"><b>Why do GEP x,1,0,0 and GEP x,1 alias?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="trail0">Why do GEP x,1,0,0 and GEP x,1 alias?</a>
+</h3>
+<div>
   <p>Quick Answer: They compute the same address location.</p>
   <p>These two GEP instructions will compute the same address because indexing
   through the 0th element does not change the address. However, it does change
@@ -355,10 +354,10 @@ idx3 = (char*) &amp;MyVar + 8
 
 <!-- *********************************************************************** -->
 
-<div class="doc_subsection">
-  <a name="vectors"><b>Can GEP index into vector elements?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="vectors">Can GEP index into vector elements?</a>
+</h3>
+<div>
   <p>This hasn't always been forcefully disallowed, though it's not recommended.
      It leads to awkward special cases in the optimizers, and fundamental
      inconsistency in the IR. In the future, it will probably be outright
@@ -368,10 +367,10 @@ idx3 = (char*) &amp;MyVar + 8
 
 <!-- *********************************************************************** -->
 
-<div class="doc_subsection">
-  <a name="addrspace"><b>What effect do address spaces have on GEPs?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="addrspace">What effect do address spaces have on GEPs?</a>
+</h3>
+<div>
    <p>None, except that the address space qualifier on the first operand pointer
       type always matches the address space qualifier on the result type.</p>
 
@@ -379,11 +378,12 @@ idx3 = (char*) &amp;MyVar + 8
 
 <!-- *********************************************************************** -->
 
-<div class="doc_subsection">
-  <a name="int"><b>How is GEP different from ptrtoint, arithmetic,
-                   and inttoptr?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="int">
+    How is GEP different from ptrtoint, arithmetic, and inttoptr?
+  </a>
+</h3>
+<div>
   <p>It's very similar; there are only subtle differences.</p>
 
   <p>With ptrtoint, you have to pick an integer type. One approach is to pick i64;
@@ -409,11 +409,13 @@ idx3 = (char*) &amp;MyVar + 8
 
 <!-- *********************************************************************** -->
 
-<div class="doc_subsection">
-  <a name="be"><b>I'm writing a backend for a target which needs custom
-                  lowering for GEP. How do I do this?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="be">
+    I'm writing a backend for a target which needs custom lowering for GEP.
+    How do I do this?
+  </a>
+</h3>
+<div>
   <p>You don't. The integer computation implied by a GEP is target-independent.
      Typically what you'll need to do is make your backend pattern-match
      expressions trees involving ADD, MUL, etc., which are what GEP is lowered
@@ -431,10 +433,10 @@ idx3 = (char*) &amp;MyVar + 8
 
 <!-- *********************************************************************** -->
 
-<div class="doc_subsection">
-  <a name="vla"><b>How does VLA addressing work with GEPs?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="vla">How does VLA addressing work with GEPs?</a>
+</h3>
+<div>
   <p>GEPs don't natively support VLAs. LLVM's type system is entirely static,
      and GEP address computations are guided by an LLVM type.</p>
 
@@ -450,16 +452,18 @@ idx3 = (char*) &amp;MyVar + 8
      VLA and non-VLA indexing in the same manner.</p>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="rules"><b>Rules</b></a></div>
+<h2><a name="rules">Rules</a></h2>
 <!-- *********************************************************************** -->
-
+<div>
 <!-- *********************************************************************** -->
 
-<div class="doc_subsection">
-  <a name="bounds"><b>What happens if an array index is out of bounds?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="bounds">What happens if an array index is out of bounds?</a>
+</h3>
+<div>
   <p>There are two senses in which an array index can be out of bounds.</p>
 
   <p>First, there's the array type which comes from the (static) type of
@@ -498,20 +502,20 @@ idx3 = (char*) &amp;MyVar + 8
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_subsection">
-  <a name="negative"><b>Can array indices be negative?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="negative">Can array indices be negative?</a>
+</h3>
+<div>
   <p>Yes. This is basically a special case of array indices being out
      of bounds.</p>
 
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_subsection">
-  <a name="compare"><b>Can I compare two values computed with GEPs?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="compare">Can I compare two values computed with GEPs?</a>
+</h3>
+<div>
   <p>Yes. If both addresses are within the same allocated object, or 
      one-past-the-end, you'll get the comparison result you expect. If either
      is outside of it, integer arithmetic wrapping may occur, so the
@@ -520,11 +524,13 @@ idx3 = (char*) &amp;MyVar + 8
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_subsection">
-  <a name="types"><b>Can I do GEP with a different pointer type than the type of
-                     the underlying object?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="types">
+    Can I do GEP with a different pointer type than the type of
+    the underlying object?
+  </a>
+</h3>
+<div>
   <p>Yes. There are no restrictions on bitcasting a pointer value to an arbitrary
      pointer type. The types in a GEP serve only to define the parameters for the
      underlying integer computation. They need not correspond with the actual
@@ -538,11 +544,12 @@ idx3 = (char*) &amp;MyVar + 8
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_subsection">
-  <a name="null"><b>Can I cast an object's address to integer and add it
-                    to null?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="null">
+    Can I cast an object's address to integer and add it to null?
+  </a>
+</h3>
+<div>
   <p>You can compute an address that way, but if you use GEP to do the add,
      you can't use that pointer to actually access the object, unless the
      object is managed outside of LLVM.</p>
@@ -562,11 +569,13 @@ idx3 = (char*) &amp;MyVar + 8
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_subsection">
-  <a name="ptrdiff"><b>Can I compute the distance between two objects, and add
-                       that value to one address to compute the other address?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="ptrdiff">
+    Can I compute the distance between two objects, and add
+    that value to one address to compute the other address?
+  </a>
+</h3>
+<div>
   <p>As with arithmetic on null, You can use GEP to compute an address that
      way, but you can't use that pointer to actually access the object if you
      do, unless the object is managed outside of LLVM.</p>
@@ -577,10 +586,10 @@ idx3 = (char*) &amp;MyVar + 8
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_subsection">
-  <a name="tbaa"><b>Can I do type-based alias analysis on LLVM IR?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="tbaa">Can I do type-based alias analysis on LLVM IR?</a>
+</h3>
+<div>
   <p>You can't do type-based alias analysis using LLVM's built-in type system,
      because LLVM has no restrictions on mixing types in addressing, loads or
      stores.</p>
@@ -594,10 +603,10 @@ idx3 = (char*) &amp;MyVar + 8
 
 <!-- *********************************************************************** -->
 
-<div class="doc_subsection">
-  <a name="overflow"><b>What happens if a GEP computation overflows?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="overflow">What happens if a GEP computation overflows?</a>
+</h3>
+<div>
    <p>If the GEP lacks the <tt>inbounds</tt> keyword, the value is the result
       from evaluating the implied two's complement integer computation. However,
       since there's no guarantee of where an object will be allocated in the
@@ -624,11 +633,12 @@ idx3 = (char*) &amp;MyVar + 8
 
 <!-- *********************************************************************** -->
 
-<div class="doc_subsection">
-  <a name="check"><b>How can I tell if my front-end is following the
-                     rules?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="check">
+    How can I tell if my front-end is following the rules?
+  </a>
+</h3>
+<div>
    <p>There is currently no checker for the getelementptr rules. Currently,
       the only way to do this is to manually check each place in your front-end
       where GetElementPtr operators are created.</p>
@@ -641,16 +651,18 @@ idx3 = (char*) &amp;MyVar + 8
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="rationale"><b>Rationale</b></a></div>
+<h2><a name="rationale">Rationale</a></h2>
 <!-- *********************************************************************** -->
-
+<div>
 <!-- *********************************************************************** -->
 
-<div class="doc_subsection">
-  <a name="goals"><b>Why is GEP designed this way?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="goals">Why is GEP designed this way?</a>
+</h3>
+<div>
    <p>The design of GEP has the following goals, in rough unofficial
       order of priority:</p>
    <ul>
@@ -669,10 +681,10 @@ idx3 = (char*) &amp;MyVar + 8
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_subsection">
-  <a name="i32"><b>Why do struct member indices always use i32?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="i32">Why do struct member indices always use i32?</a>
+</h3>
+<div>
   <p>The specific type i32 is probably just a historical artifact, however it's
      wide enough for all practical purposes, so there's been no need to change it.
      It doesn't necessarily imply i32 address arithmetic; it's just an identifier
@@ -684,10 +696,10 @@ idx3 = (char*) &amp;MyVar + 8
 
 <!-- *********************************************************************** -->
 
-<div class="doc_subsection">
-  <a name="uglygep"><b>What's an uglygep?</b></a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="uglygep">What's an uglygep?</a>
+</h3>
+<div>
   <p>Some LLVM optimizers operate on GEPs by internally lowering them into
      more primitive integer expressions, which allows them to be combined
      with other integer expressions and/or split into multiple separate
@@ -704,11 +716,13 @@ idx3 = (char*) &amp;MyVar + 8
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="summary"><b>Summary</b></a></div>
+<h2><a name="summary">Summary</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
   <p>In summary, here's some things to always remember about the GetElementPtr
   instruction:</p>
   <ol>
@@ -732,8 +746,8 @@ idx3 = (char*) &amp;MyVar + 8
   src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br/>
-  Last modified: $Date: 2011-02-11 22:50:52 +0100 (Fri, 11 Feb 2011) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br/>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/GettingStarted.html b/docs/GettingStarted.html
index dfb976a..cc5c59e 100644
--- a/docs/GettingStarted.html
+++ b/docs/GettingStarted.html
@@ -8,9 +8,9 @@
 </head>
 <body>
 
-<div class="doc_title">
+<h1>
   Getting Started with the LLVM System  
-</div>
+</h1>
 
 <ul>
   <li><a href="#overview">Overview</a>
@@ -62,7 +62,7 @@
   <p>Written by: 
     <a href="mailto:criswell@uiuc.edu">John Criswell</a>, 
     <a href="mailto:sabre@nondot.org">Chris Lattner</a>,
-    <a href="http://misha.brukman.net">Misha Brukman</a>, 
+    <a href="http://misha.brukman.net/">Misha Brukman</a>, 
     <a href="http://www.cs.uiuc.edu/~vadve">Vikram Adve</a>, and
     <a href="mailto:gshi1@uiuc.edu">Guochun Shi</a>.
   </p>
@@ -70,12 +70,12 @@
 
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
-  <a name="overview"><b>Overview</b></a>
-</div>
+<h2>
+  <a name="overview">Overview</a>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to LLVM! In order to get started, you first need to know some
 basic information.</p>
@@ -102,12 +102,12 @@ and performance.
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
-  <a name="quickstart"><b>Getting Started Quickly (A Summary)</b></a>
-</div>
+<h2>
+  <a name="quickstart">Getting Started Quickly (A Summary)</a>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Here's the short story for getting up and running quickly with LLVM:</p>
 
@@ -116,13 +116,13 @@ and performance.
   <li>Read the documentation.</li>
   <li>Remember that you were warned twice about reading the documentation.</li>
   <li>Install the llvm-gcc-4.2 front end if you intend to compile C or C++
-      (see <a href="#installcf">Install the GCC Front End</a> for details):</li>
+      (see <a href="#installcf">Install the GCC Front End</a> for details):
     <ol>
       <li><tt>cd <i>where-you-want-the-C-front-end-to-live</i></tt></li>
       <li><tt>gunzip --stdout llvm-gcc-4.2-<i>version</i>-<i>platform</i>.tar.gz | tar -xvf -</tt></li>
 	  <li><tt><i>install-binutils-binary-from-MinGW</i></tt> (Windows only)</li>
 	  <li>Note: If the binary extension is "<tt>.bz</tt>" use <tt>bunzip2</tt> instead of <tt>gunzip</tt>.</li>
-	  <li>Note: On Windows, use <a href="http://www.7-zip.org">7-Zip</a> or a similar archiving tool.</li>
+	  <li>Note: On Windows, use <a href="http://www.7-zip.org/">7-Zip</a> or a similar archiving tool.</li>
 	  <li>Add <tt>llvm-gcc</tt>'s "<tt>bin</tt>" directory to your <tt>PATH</tt> environment variable.</li>
     </ol></li>
 
@@ -191,25 +191,23 @@ Layout</a> to learn about the layout of the source code tree.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
-  <a name="requirements"><b>Requirements</b></a>
-</div>
+<h2>
+  <a name="requirements">Requirements</a>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Before you begin to use the LLVM system, review the requirements given below.
 This may save you some trouble by knowing ahead of time what hardware and
 software you will need.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
-  <a name="hardware"><b>Hardware</b></a>
-</div>
+<h3>
+  <a name="hardware">Hardware</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM is known to work on the following platforms:</p>
 
@@ -268,7 +266,8 @@ software you will need.</p>
 <tr>
   <td>MinGW/Win32</td>
   <td>x86<sup><a href="#pf_1">1</a>,<a href="#pf_6">6</a>,
-     <a href="#pf_8">8</a>, <a href="#pf_10">10</a></sup></td>
+     <a href="#pf_8">8</a>, <a href="#pf_10">10</a>,
+     <a href="#pf_11">11</a></sup></td>
   <td>GCC 3.4.X, binutils 2.20</td>
 </tr>
 </table>
@@ -311,6 +310,11 @@ software you will need.</p>
   <td>Itanium (IA-64)</td>
   <td>HP aCC</td>
 </tr>
+<tr>
+  <td>Windows x64</td>
+  <td>x86-64</td>
+  <td>mingw-w64's GCC-4.5.x<sup><a href="#pf_12">12</a></sup></td>
+</tr>
 </table>
 
 <p><b>Notes:</b></p>
@@ -337,9 +341,10 @@ up</a></li>
     before any Windows-based versions such as Strawberry Perl and
     ActivePerl, as these have Windows-specifics that will cause the
     build to fail.</a></li>
-<li><a name="pf_11">In general, LLVM modules requiring dynamic linking can
-    not be built on Windows. However, you can build LLVM tools using
-    <i>"make tools-only"</i>.</li>
+<li><a name="pf_11">To use LLVM modules on Win32-based system,
+    you may configure LLVM with <i>&quot;--enable-shared&quot;</i>.</a></li>
+<li><a name="pf_12">To compile SPU backend, you need to add
+    <tt>&quot;LDFLAGS=-Wl,--stack,16777216&quot;</tt> to configure.</a></li>
 </ol>
 </div>
 
@@ -363,8 +368,10 @@ href="GCCFEBuildInstrs.html">try to compile it</a> on your platform.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="software"><b>Software</b></a></div>
-<div class="doc_text">
+<h3>
+  <a name="software">Software</a>
+</h3>
+<div>
   <p>Compiling LLVM requires that you have several software packages 
   installed. The table below lists those required packages. The Package column
   is the usual name for the software package that LLVM depends on. The Version
@@ -380,13 +387,13 @@ href="GCCFEBuildInstrs.html">try to compile it</a> on your platform.</p>
     </tr>
 
     <tr>
-      <td><a href="http://gcc.gnu.org">GCC</a></td>
+      <td><a href="http://gcc.gnu.org/">GCC</a></td>
       <td>3.4.2</td>
       <td>C/C++ compiler<sup><a href="#sf1">1</a></sup></td>
     </tr>
 
     <tr>
-      <td><a href="http://www.gnu.org/software/texinfo">TeXinfo</a></td>
+      <td><a href="http://www.gnu.org/software/texinfo/">TeXinfo</a></td>
       <td>4.5</td>
       <td>For building the CFE</td>
     </tr>
@@ -397,6 +404,11 @@ href="GCCFEBuildInstrs.html">try to compile it</a> on your platform.</p>
       <td>Subversion access to LLVM<sup><a href="#sf2">2</a></sup></td>
     </tr>
 
+    <!-- FIXME:
+    Do we support dg?
+    Are DejaGnu and expect obsolete?
+    Shall we mention Python? -->
+
     <tr>
       <td><a href="http://savannah.gnu.org/projects/dejagnu">DejaGnu</a></td>
       <td>1.4.2</td>
@@ -428,13 +440,13 @@ href="GCCFEBuildInstrs.html">try to compile it</a> on your platform.</p>
     </tr>
 
     <tr>
-      <td><a href="http://www.gnu.org/software/autoconf">GNU Autoconf</a></td>
+      <td><a href="http://www.gnu.org/software/autoconf/">GNU Autoconf</a></td>
       <td>2.60</td>
       <td>Configuration script builder<sup><a href="#sf4">4</a></sup></td>
     </tr>
 
     <tr>
-      <td><a href="http://www.gnu.org/software/automake">GNU Automake</a></td>
+      <td><a href="http://www.gnu.org/software/automake/">GNU Automake</a></td>
       <td>1.9.6</td>
       <td>aclocal macro generator<sup><a href="#sf4">4</a></sup></td>
     </tr>
@@ -496,11 +508,11 @@ href="GCCFEBuildInstrs.html">try to compile it</a> on your platform.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="brokengcc">Broken versions of GCC and other tools</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM is very demanding of the host C++ compiler, and as such tends to expose
 bugs in the compiler.  In particular, several versions of GCC crash when trying
@@ -593,15 +605,15 @@ upgrading to a newer version of Gold.</p>
 
 </div>
 
-
+</div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
-  <a name="starting"><b>Getting Started with LLVM</b></a>
-</div>
+<h2>
+  <a name="starting">Getting Started with LLVM</a>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The remainder of this guide is meant to get you up and running with
 LLVM and to give you some basic information about the LLVM environment.</p>
@@ -611,14 +623,13 @@ href="#layout">general layout</a> of the the LLVM source tree, a <a
 href="#tutorial">simple example</a> using the LLVM tool chain, and <a
 href="#links">links</a> to find more information about LLVM or to get
 help via e-mail.</p>
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="terminology">Terminology and Notation</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Throughout this manual, the following names are used to denote paths
 specific to the local system and working environment.  <i>These are not
@@ -651,11 +662,11 @@ All these paths are absolute:</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="environment">Setting Up Your Environment</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 In order to compile and use LLVM, you may need to set some environment
@@ -674,11 +685,11 @@ variables.
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="unpack">Unpacking the LLVM Archives</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 If you have the LLVM distribution, you will need to unpack it before you
@@ -708,11 +719,11 @@ compressed with the gzip program.
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="checkout">Checkout LLVM from Subversion</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>If you have access to our Subversion repository, you can get a fresh copy of
 the entire source code.  All you need to do is check it out from Subversion as
@@ -736,6 +747,7 @@ revision), you can checkout it from the '<tt>tags</tt>' directory (instead of
 subdirectories of the '<tt>tags</tt>' directory:</p>
 
 <ul>
+<li>Release 2.9: <b>RELEASE_29/final</b></li>
 <li>Release 2.8: <b>RELEASE_28</b></li>
 <li>Release 2.7: <b>RELEASE_27</b></li>
 <li>Release 2.6: <b>RELEASE_26</b></li>
@@ -778,30 +790,30 @@ instructions</a> to successfully get and build the LLVM GCC front-end.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="git_mirror">GIT mirror</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>GIT mirrors are available for a number of LLVM subprojects. These mirrors
   sync automatically with each Subversion commit and contain all necessary
   git-svn marks (so, you can recreate git-svn metadata locally). Note that right
   now mirrors reflect only <tt>trunk</tt> for each project. You can do the
-  read-only GIT clone of LLVM via: 
+  read-only GIT clone of LLVM via:</p>
+
 <pre>
 % git clone http://llvm.org/git/llvm.git
 </pre>
-</p>
 
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="installcf">Install the GCC Front End</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Before configuring and compiling the LLVM suite (or if you want to use just the LLVM
 GCC front end) you can optionally extract the front end from the binary distribution.
@@ -810,7 +822,7 @@ you can optionally <a href="GCCFEBuildInstrs.html">build llvm-gcc yourself</a> a
 main LLVM repository.</p>
 
 <p>To install the GCC front end, do the following (on Windows, use an archival tool
-like <a href="http://www.7-zip.org">7-zip</a> that understands gzipped tars):</p>
+like <a href="http://www.7-zip.org/">7-zip</a> that understands gzipped tars):</p>
 
 <ol>
   <li><tt>cd <i>where-you-want-the-front-end-to-live</i></tt></li>
@@ -867,11 +879,11 @@ please let us know how you would like to see things improved by dropping us a no
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="config">Local LLVM Configuration</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
   <p>Once checked out from the Subversion repository, the LLVM suite source 
   code must be
@@ -989,11 +1001,11 @@ script to configure the build system:</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="compile">Compiling the LLVM Suite Source Code</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Once you have configured LLVM, you can build it.  There are three types of
 builds:</p>
@@ -1123,11 +1135,11 @@ that directory that is out of date.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="cross-compile">Cross-Compiling LLVM</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
   <p>It is possible to cross-compile LLVM itself. That is, you can create LLVM
   executables and libraries to be hosted on a platform different from the
   platform where they are build (a Canadian Cross build). To configure a
@@ -1141,11 +1153,11 @@ that directory that is out of date.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="objfiles">The Location of LLVM Object Files</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM build system is capable of sharing a single LLVM source tree among
 several LLVM builds.  Hence, it is possible to build LLVM for several different
@@ -1201,11 +1213,11 @@ named after the build type:</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="optionalconfig">Optional Configuration Items</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 If you're running on a Linux system that supports the "<a
@@ -1225,7 +1237,7 @@ $ ./hello.bc
 
 <p>
 This allows you to execute LLVM bitcode files directly.  On Debian, you 
-can also use this command instead of the 'echo' command above:</p>
+can also use this command instead of the 'echo' command above:
 </p>
 
 <div class="doc_code">
@@ -1236,31 +1248,37 @@ $ sudo update-binfmts --install llvm /path/to/lli --magic 'BC'
 
 </div>
 
-<!-- *********************************************************************** -->
-<div class="doc_section">
-  <a name="layout"><b>Program Layout</b></a>
 </div>
+
+<!-- *********************************************************************** -->
+<h2>
+  <a name="layout">Program Layout</a>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>One useful source of information about the LLVM source base is the LLVM <a
-href="http://www.doxygen.org">doxygen</a> documentation available at <tt><a
+href="http://www.doxygen.org/">doxygen</a> documentation available at <tt><a
 href="http://llvm.org/doxygen/">http://llvm.org/doxygen/</a></tt>.
 The following is a brief introduction to code layout:</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="examples"><tt>llvm/examples</tt></a></div>
-<div class="doc_text">
+<h3>
+  <a name="examples"><tt>llvm/examples</tt></a>
+</h3>
+
+<div>
   <p>This directory contains some simple examples of how to use the LLVM IR and
   JIT.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="include"><tt>llvm/include</tt></a></div>
-<div class="doc_text">
+<h3>
+  <a name="include"><tt>llvm/include</tt></a>
+</h3>
+
+<div>
 
 <p>This directory contains public header files exported from the LLVM
 library. The three main subdirectories of this directory are:</p>
@@ -1287,8 +1305,11 @@ library. The three main subdirectories of this directory are:</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="lib"><tt>llvm/lib</tt></a></div>
-<div class="doc_text">
+<h3>
+  <a name="lib"><tt>llvm/lib</tt></a>
+</h3>
+
+<div>
 
 <p>This directory contains most of the source files of the LLVM system. In LLVM,
 almost all code exists in libraries, making it very easy to share code among the
@@ -1327,6 +1348,10 @@ different <a href="#tools">tools</a>.</p>
   <dd> This directory contains the major parts of the code generator: Instruction 
   Selector, Instruction Scheduling, and Register Allocation.</dd>
 
+  <dt><tt><b>llvm/lib/MC/</b></tt></dt>
+  <dd>(FIXME: T.B.D.)</dd>
+
+  <!--FIXME: obsoleted -->
   <dt><tt><b>llvm/lib/Debugger/</b></tt></dt>
   <dd> This directory contains the source level debugger library that makes 
   it possible to instrument LLVM programs so that a debugger could identify 
@@ -1340,6 +1365,7 @@ different <a href="#tools">tools</a>.</p>
   <dd> This directory contains the source code that corresponds to the header 
   files located in <tt>llvm/include/Support/</tt>.</dd>
 
+  <!--FIXME: obsoleted -->
   <dt><tt><b>llvm/lib/System/</b></tt></dt>
   <dd>This directory contains the operating system abstraction layer that
   shields LLVM from platform-specific coding.</dd>
@@ -1348,8 +1374,11 @@ different <a href="#tools">tools</a>.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="projects"><tt>llvm/projects</tt></a></div>
-<div class="doc_text">
+<h3>
+  <a name="projects"><tt>llvm/projects</tt></a>
+</h3>
+
+<div>
   <p>This directory contains projects that are not strictly part of LLVM but are
   shipped with LLVM. This is also the directory where you should create your own
   LLVM-based projects. See <tt>llvm/projects/sample</tt> for an example of how
@@ -1357,8 +1386,11 @@ different <a href="#tools">tools</a>.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="runtime"><tt>llvm/runtime</tt></a></div>
-<div class="doc_text">
+<h3>
+  <a name="runtime"><tt>llvm/runtime</tt></a>
+</h3>
+
+<div>
 
 <p>This directory contains libraries which are compiled into LLVM bitcode and
 used when linking programs with the GCC front end.  Most of these libraries are
@@ -1371,16 +1403,22 @@ end to compile.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="test"><tt>llvm/test</tt></a></div>
-<div class="doc_text">
+<h3>
+  <a name="test"><tt>llvm/test</tt></a>
+</h3>
+
+<div>
   <p>This directory contains feature and regression tests and other basic sanity
   checks on the LLVM infrastructure. These are intended to run quickly and cover
   a lot of territory without being exhaustive.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="test-suite"><tt>test-suite</tt></a></div>
-<div class="doc_text">
+<h3>
+  <a name="test-suite"><tt>test-suite</tt></a>
+</h3>
+
+<div>
   <p>This is not a directory in the normal llvm module; it is a separate
   Subversion
   module that must be checked out (usually to <tt>projects/test-suite</tt>). 
@@ -1395,8 +1433,11 @@ end to compile.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="tools"><tt>llvm/tools</tt></a></div>
-<div class="doc_text">
+<h3>
+  <a name="tools"><tt>llvm/tools</tt></a>
+</h3>
+
+<div>
 
 <p>The <b>tools</b> directory contains the executables built out of the
 libraries above, which form the main part of the user interface.  You can
@@ -1480,8 +1521,11 @@ information is in the <a href="CommandGuide/index.html">Command Guide</a>.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="utils"><tt>llvm/utils</tt></a></div>
-<div class="doc_text">
+<h3>
+  <a name="utils"><tt>llvm/utils</tt></a>
+</h3>
+
+<div>
 
 <p>This directory contains utilities for working with LLVM source code, and some
 of the utilities are actually required as part of the build process because they
@@ -1542,13 +1586,15 @@ are code generators for parts of LLVM infrastructure.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="tutorial">An Example Using the LLVM Tool Chain</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 <p>This section gives an example of using LLVM.  llvm-gcc3 is now obsolete,
 so we only include instructions for llvm-gcc4.
 </p>
@@ -1559,12 +1605,13 @@ create bitcode by default: <i>gcc4</i> produces native code. As the example belo
 the '--emit-llvm' flag is needed to produce LLVM bitcode output. For <i>makefiles</i> and
 <i>configure</i> scripts, the CFLAGS variable needs '--emit-llvm' to produce bitcode
 output.</p>
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="tutorial4">Example with llvm-gcc4</a></div>
+<h3>
+  <a name="tutorial4">Example with llvm-gcc4</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <ol>
   <li><p>First, create a simple C file, name it 'hello.c':</p>
@@ -1645,14 +1692,15 @@ int main() {
 
 </div>
 
+</div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="problems">Common Problems</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>If you are having problems building or using LLVM, or if you have any other
 general questions about LLVM, please consult the <a href="FAQ.html">Frequently
@@ -1661,12 +1709,12 @@ Asked Questions</a> page.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="links">Links</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This document is just an <b>introduction</b> on how to use LLVM to do
 some simple things... there are many more interesting and complicated things
@@ -1694,8 +1742,8 @@ out:</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.x10sys.com/rspencer/">Reid Spencer</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-02-01 21:08:28 +0100 (Tue, 01 Feb 2011) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/GettingStartedVS.html b/docs/GettingStartedVS.html
index 7c0bf00..6a60433 100644
--- a/docs/GettingStartedVS.html
+++ b/docs/GettingStartedVS.html
@@ -8,9 +8,9 @@
 </head>
 <body>
 
-<div class="doc_title">
+<h1>
   Getting Started with the LLVM System using Microsoft Visual Studio
-</div>
+</h1>
 
 <ul>
   <li><a href="#overview">Overview</a>
@@ -26,19 +26,17 @@
 </ul>
 
 <div class="doc_author">
-  <p>Written by:
-    <a href="mailto:jeffc@jolt-lang.org">Jeff Cohen</a>
-  </p>
+  <p>Written by: <a href="http://llvm.org/">The LLVM Team</a></p>
 </div>
 
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="overview"><b>Overview</b></a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
   <p>Welcome to LLVM on Windows! This document only covers LLVM on Windows using
   Visual Studio, not mingw or cygwin. In order to get started, you first need to
@@ -72,25 +70,23 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="requirements"><b>Requirements</b></a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
   <p>Before you begin to use the LLVM system, review the requirements given
   below.  This may save you some trouble by knowing ahead of time what hardware
   and software you will need.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="hardware"><b>Hardware</b></a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
   <p>Any system that can adequately run Visual Studio .NET 2005 SP1 is fine.
   The LLVM source tree and object files, libraries and executables will consume
@@ -99,8 +95,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="software"><b>Software</b></a></div>
-<div class="doc_text">
+<h3><a name="software"><b>Software</b></a></h3>
+<div>
 
   <p>You will need Visual Studio .NET 2005 SP1 or higher.  The VS2005 SP1
   beta and the normal VS2005 still have bugs that are not completely
@@ -120,13 +116,15 @@
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="quickstart"><b>Getting Started</b></a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Here's the short story for getting up and running quickly with LLVM:</p>
 
@@ -196,7 +194,9 @@
   <ul>
     <li>If %PATH% does not contain GnuWin32, you may specify LLVM_LIT_TOOLS_DIR
     on CMake for the path to GnuWin32.</li>
-    <li>You can run LLVM tests to build the project "check".</li>
+    <li>You can run LLVM tests by merely building the project
+      "check". The test results will be shown in the VS output
+      window.</li>
   </ul>
   </li>
 
@@ -215,25 +215,26 @@
     <p>Note that quite a few of these test will fail.</p>
     </li>
 
-    <li>A specific test or test directory can be run with:</li>
+    <li>A specific test or test directory can be run with:
 
 <div class="doc_code">
 <pre>
 % llvm-lit test/path/to/test
 </pre>
 </div>
-
+    </li>
+  </ul>
 </ol>
 
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="tutorial">An Example Using the LLVM Tool Chain</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <ol>
   <li><p>First, create a simple C file, name it 'hello.c':</p>
@@ -318,12 +319,12 @@ int main() {
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="problems">Common Problems</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>If you are having problems building or using LLVM, or if you have any other
 general questions about LLVM, please consult the <a href="FAQ.html">Frequently
@@ -332,12 +333,12 @@ Asked Questions</a> page.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="links">Links</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This document is just an <b>introduction</b> to how to use LLVM to do
 some simple things... there are many more interesting and complicated things
@@ -361,9 +362,8 @@ out:</p>
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
-  <a href="mailto:jeffc@jolt-lang.org">Jeff Cohen</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-02-20 16:34:12 +0100 (Sun, 20 Feb 2011) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/GoldPlugin.html b/docs/GoldPlugin.html
index 68c5cf1..e25c457 100644
--- a/docs/GoldPlugin.html
+++ b/docs/GoldPlugin.html
@@ -7,7 +7,7 @@
 </head>
 <body>
       
-<div class="doc_title">LLVM gold plugin</div>
+<h1>LLVM gold plugin</h1>
 <ol>
   <li><a href="#introduction">Introduction</a></li>
   <li><a href="#build">How to build it</a></li>
@@ -21,9 +21,9 @@
 <div class="doc_author">Written by Nick Lewycky</div>
 
 <!--=========================================================================-->
-<div class="doc_section"><a name="introduction">Introduction</a></div>
+<h2><a name="introduction">Introduction</a></h2>
 <!--=========================================================================-->
-<div class="doc_text">
+<div>
   <p>Building with link time optimization requires cooperation from the
 system linker. LTO support on Linux systems requires that you use
 the <a href="http://sourceware.org/binutils">gold linker</a> which supports
@@ -33,14 +33,14 @@ project.</p>
   <p>The LLVM gold plugin implements the
 <a href="http://gcc.gnu.org/wiki/whopr/driver">gold plugin interface</a>
 on top of
-<a href="http://llvm.org/docs/LinkTimeOptimization.html#lto">libLTO</a>.
+<a href="LinkTimeOptimization.html#lto">libLTO</a>.
 The same plugin can also be used by other tools such as <tt>ar</tt> and
 <tt>nm</tt>.
 </div>
 <!--=========================================================================-->
-<div class="doc_section"><a name="build">How to build it</a></div>
+<h2><a name="build">How to build it</a></h2>
 <!--=========================================================================-->
-<div class="doc_text">
+<div>
   <p>You need to have gold with plugin support and build the LLVMgold
 plugin. Check whether you have gold running <tt>/usr/bin/ld -v</tt>. It will
 report &#8220;GNU gold&#8221; or else &#8220GNU ld&#8221; if not. If you have
@@ -72,9 +72,9 @@ placed.
 </ul>
 </div>
 <!--=========================================================================-->
-<div class="doc_section"><a name="usage">Usage</a></div>
+<h2><a name="usage">Usage</a></h2>
 <!--=========================================================================-->
-<div class="doc_text">
+<div>
   <p>The linker takes a <tt>-plugin</tt> option that points to the path of
   the plugin <tt>.so</tt> file. To find out what link command <tt>gcc</tt>
   would run in a given situation, run <tt>gcc -v <em>[...]</em></tt> and look
@@ -95,14 +95,13 @@ placed.
   own gold, be sure to install the <tt>ar</tt> and <tt>nm-new</tt> you built to
   <tt>/usr/bin</tt>.
   <p>
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="example1">Example of link time optimization</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
   <p>The following example shows a worked example of the gold plugin mixing
   LLVM bitcode and native code.
 <pre class="doc_code">
@@ -145,14 +144,20 @@ $ llvm-gcc -use-gold-plugin a.a b.o -o main # &lt;-- link with LLVMgold plugin
 </pre>
   <p>Gold informs the plugin that foo3 is never referenced outside the IR,
   leading LLVM to delete that function. However, unlike in the
-  <a href="http://llvm.org/docs/LinkTimeOptimization.html#example1">libLTO
+  <a href="LinkTimeOptimization.html#example1">libLTO
   example</a> gold does not currently eliminate foo4.</p>
 </div>
 
+</div>
+
 <!--=========================================================================-->
-<div class="doc_section"><a name="lto_autotools">Quickstart for using LTO with autotooled projects</a></div>
+<h2>
+  <a name="lto_autotools">
+    Quickstart for using LTO with autotooled projects
+  </a>
+</h2>
 <!--=========================================================================-->
-<div class="doc_text">
+<div>
   <p>Once your system <tt>ld</tt>, <tt>ar</tt> and <tt>nm</tt> all support LLVM
   bitcode, everything is in place for an easy to use LTO build of autotooled
   projects:</p>
@@ -189,9 +194,9 @@ export CFLAGS="-O4"
 </div>
 
 <!--=========================================================================-->
-<div class="doc_section"><a name="licensing">Licensing</a></div>
+<h2><a name="licensing">Licensing</a></h2>
 <!--=========================================================================-->
-<div class="doc_text">
+<div>
   <p>Gold is licensed under the GPLv3. LLVMgold uses the interface file
 <tt>plugin-api.h</tt> from gold which means that the resulting LLVMgold.so
 binary is also GPLv3. This can still be used to link non-GPLv3 programs just
@@ -206,7 +211,7 @@ as much as gold could without the plugin.</p>
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
   <a href="mailto:nicholas@metrix.on.ca">Nick Lewycky</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
   Last modified: $Date: 2010-04-16 23:58:21 -0800 (Fri, 16 Apr 2010) $
 </address>
 </body>
diff --git a/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt b/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt
index 1c725f5..81ca539 100644
--- a/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt
+++ b/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt
@@ -60,11 +60,11 @@ Understood.  :)
 
 Yup, I think that this makes a lot of sense.  I am still intrigued,
 however, by the prospect of a minimally allocated VM representation... I
-think that it could have definate advantages for certain applications
+think that it could have definite advantages for certain applications
 (think very small machines, like PDAs).  I don't, however, think that our
 initial implementations should focus on this.  :)
 
-Here are some other auxilliary goals that I think we should consider:
+Here are some other auxiliary goals that I think we should consider:
 
 1. Primary goal: Support a high performance dynamic compilation
    system.  This means that we have an "ideal" division of labor between
diff --git a/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt b/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt
index b66e185..01b644b 100644
--- a/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt
+++ b/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt
@@ -40,7 +40,7 @@ IDEAS TO CONSIDER
    packaged with the bytecodes themselves.  As a conceptual implementation 
    idea, we could include an immediate dominator number for each basic block
    in the LLVM bytecode program.  Basic blocks could be numbered according
-   to the order of occurance in the bytecode representation.
+   to the order of occurrence in the bytecode representation.
 
 2. Including loop header and body information.  This would facilitate
    detection of intervals and natural loops.
diff --git a/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt
index 7b90327..8397324 100644
--- a/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt
+++ b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt
@@ -39,7 +39,7 @@ declaration and calling syntax.
 
 Very true.  If you're implementing an object oriented language, however,
 remember that you have to do all the pointer to member function stuff
-yourself.... so everytime you invoke a virtual method one is involved
+yourself.... so every time you invoke a virtual method one is involved
 (instead of having C++ hide it for you behind "syntactic sugar").
 
 > And the old array syntax:
diff --git a/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt b/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt
index 5c87330..da50263 100644
--- a/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt
+++ b/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt
@@ -18,7 +18,7 @@ suggested, as specified below:
 
 Very true.  We should discuss this more, but my reasoning is more of a
 consistency argument.  There are VERY few instructions that can have all
-of the types eliminated, and doing so when available unnecesarily makes
+of the types eliminated, and doing so when available unnecessarily makes
 the language more difficult to handle.  Especially when you see 'int
 %this' and 'bool %that' all over the place, I think it would be
 disorienting to see:
@@ -44,7 +44,7 @@ branches).
 
 No.  This was something I was debating for a while, and didn't really feel
 strongly about either way.  It is common to switch on other types in HLL's
-(for example signed int's are particually common), but in this case, all
+(for example signed int's are particularly common), but in this case, all
 that will be added is an additional 'cast' instruction.  I removed that
 from the spec.
 
@@ -160,7 +160,7 @@ that can be trivally translated into a conditional move...
 > I agree that we need a static data space.  Otherwise, emulating global
 > data gets unnecessarily complex.
 
-Definately.  Also a later item though.  :)
+Definitely.  Also a later item though.  :)
 
 > We once talked about adding a symbolic thread-id field to each
 > ..
diff --git a/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt b/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt
index 6c9e0971..e61042f 100644
--- a/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt
+++ b/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt
@@ -42,7 +42,7 @@ Does using GCC's backend buy us anything?
 > optimization (step 16 in your list).  Do you have a breakdown of that?
 
 Not really.  The irritating part of GCC is that it mixes it all up and
-doesn't have a clean seperation of concerns.  A lot of the "back end
+doesn't have a clean separation of concerns.  A lot of the "back end
 optimization" happens right along with other data optimizations (ie, CSE
 of machine specific things).
 
diff --git a/docs/HistoricalNotes/2002-05-12-InstListChange.txt b/docs/HistoricalNotes/2002-05-12-InstListChange.txt
index 004edb0..638682b 100644
--- a/docs/HistoricalNotes/2002-05-12-InstListChange.txt
+++ b/docs/HistoricalNotes/2002-05-12-InstListChange.txt
@@ -17,7 +17,7 @@ iterator to an instruction, which, given just an Instruction*, requires a
 linear search of the basic block the instruction is contained in... just 
 to insert an instruction before another instruction, or to delete an 
 instruction!  This complicates algorithms that should be very simple (like 
-simple constant propogation), because they aren't actually sparse anymore, 
+simple constant propagation), because they aren't actually sparse anymore,
 they have to traverse basic blocks to remove constant propogated 
 instructions.
 
diff --git a/docs/HowToReleaseLLVM.html b/docs/HowToReleaseLLVM.html
index 7663321..f52f326 100644
--- a/docs/HowToReleaseLLVM.html
+++ b/docs/HowToReleaseLLVM.html
@@ -7,7 +7,7 @@
 </head>
 <body>
 
-<div class="doc_title">How To Release LLVM To The Public</div>
+<h1>How To Release LLVM To The Public</h1>
 <ol>
   <li><a href="#introduction">Introduction</a></li>
   <li><a href="#criteria">Qualification Criteria</a></li>
@@ -17,495 +17,597 @@
 <div class="doc_author">
   <p>Written by <a href="mailto:tonic@nondot.org">Tanya Lattner</a>,
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>,
-  <a href="mailto:criswell@cs.uiuc.edu">John Criswell</a>
+  <a href="mailto:criswell@cs.uiuc.edu">John Criswell</a>, &amp;
+  <a href="mailto:wendling@apple.com">Bill Wendling</a>
   </p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="introduction">Introduction</a></div>
+<h2><a name="introduction">Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
-  <p>
-  This document collects information about successfully releasing LLVM 
-  (including subprojects llvm-gcc and Clang) to the public. 
-  It is the release manager's responsibility to ensure that a high quality 
-  build of LLVM is released. 
-  </p>
+<div>
+
+<p>This document contains information about successfully releasing LLVM &mdash;
+   including subprojects: e.g., <tt>llvm-gcc</tt> and <tt>clang</tt> &mdash; to
+   the public. It is the Release Manager's responsibility to ensure that a high
+   quality build of LLVM is released.</p>
+
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="process">Release Timeline</a></div>
+<h2><a name="process">Release Timeline</a></h2>
 <!-- *********************************************************************** -->
-<div class="doc_text">
-  <p>LLVM is released on a time based schedule (currently every 6 months). We
-  do not have dot releases because of the nature of LLVM incremental 
-  development philosophy. The release schedule is roughly as follows:
-  </p>
-<ol>
-<li>Set code freeze and branch creation date for 6 months after last code freeze 
-date. Announce release schedule to the LLVM community and update the website.</li>
-<li>Create release branch and begin release process. </li>
-<li>Send out pre-release for first round of testing. Testing will last 7-10 days.
-During the first round of testing, regressions should be found and fixed. Patches
-are merged from mainline to the release branch.</li>
-<li>Generate and send out second pre-release. Bugs found during this time will
-not be fixed unless absolutely critical. Bugs introduce by patches merged in
-will be fixed and if so, a 3rd round of testing is needed.</li>
-<li>The release notes should be updated during the first and second round of
-pre-release testing.</li>
-<li>Finally, release!</li>
-</ol>
-</div>
+<div>
+
+<p>LLVM is released on a time based schedule &mdash; roughly every 6 months. We
+   do not normally have dot releases because of the nature of LLVM's incremental
+   development philosophy. That said, the only thing preventing dot releases for
+   critical bug fixes from happening is a lack of resources &mdash; testers,
+   machines, time, etc. And, because of the high quality we desire for LLVM
+   releases, we cannot allow for a truncated form of release qualification.</p>
+
+<p>The release process is roughly as follows:</p>
+
+<ul>
+  <li><p>Set code freeze and branch creation date for 6 months after last code
+      freeze date. Announce release schedule to the LLVM community and update
+      the website.</p></li>
+
+  <li><p>Create release branch and begin release process.</p></li>
+
+  <li><p>Send out release candidate sources for first round of testing. Testing
+      lasts 7-10 days. During the first round of testing, any regressions found
+      should be fixed. Patches are merged from mainline into the release
+      branch. Also, all features need to be completed during this time. Any
+      features not completed at the end of the first round of testing will be
+      removed or disabled for the release.</p></li>
 
+  <li><p>Generate and send out the second release candidate sources. Only
+      <em>critial</em> bugs found during this testing phase will be fixed. Any
+      bugs introduced by merged patches will be fixed. If so a third round of
+      testing is needed.</p></li>
+
+  <li><p>The release notes are updated.</p></li>
+
+  <li><p>Finally, release!</p></li>
+</ul>
+
+</div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="process">Release Process</a></div>
+<h2><a name="process">Release Process</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
+
+<ol>
+  <li><a href="#release-admin">Release Administrative Tasks</a>
   <ol>
-    <li><a href="#release-admin">Release Administrative Tasks</a></li>
-    <ol>
     <li><a href="#branch">Create Release Branch</a></li>
     <li><a href="#verchanges">Update Version Numbers</a></li>
-    </ol>
-    <li><a href="#release-build">Building the Release</a></li>
-    <ol>
+  </ol>
+  </li>
+  <li><a href="#release-build">Building the Release</a>
+  <ol>
     <li><a href="#dist">Build the LLVM Source Distributions</a></li>
     <li><a href="#build">Build LLVM</a></li>
     <li><a href="#llvmgccbin">Build the LLVM-GCC Binary Distribution</a></li>
     <li><a href="#clangbin">Build the Clang Binary Distribution</a></li>
     <li><a href="#target-build">Target Specific Build Details</a></li>
-    </ol>
-    
-    <li><a href="#release-qualify">Release Qualification Criteria</a></li>
-    <ol>
+  </ol>
+  </li>
+  <li><a href="#release-qualify">Release Qualification Criteria</a>
+  <ol>
     <li><a href="#llvm-qualify">Qualify LLVM</a></li>
     <li><a href="#llvmgcc-qualify">Qualify LLVM-GCC</a></li>
     <li><a href="#clang-qualify">Qualify Clang</a></li>
     <li><a href="#targets">Specific Target Qualification Details</a></li>
-    </ol>
-    
-    <li><a href="#commTest">Community Testing</a></li>    
-    <li><a href="#release-patch">Release Patch Rules</a></li>
-
-    
-    <li><a href="#release-final">Release final tasks</a></li>
-    <ol>
+  </ol>
+  </li>
+
+  <li><a href="#commTest">Community Testing</a></li>    
+  <li><a href="#release-patch">Release Patch Rules</a></li>
+  <li><a href="#release-final">Release final tasks</a>
+  <ol>
     <li><a href="#updocs">Update Documentation</a></li>
-    <li><a href="#tag">Tag the LLVM Release Branch</a></li>
+    <li><a href="#tag">Tag the LLVM Final Release</a></li>
     <li><a href="#updemo">Update the LLVM Demo Page</a></li>
     <li><a href="#webupdates">Update the LLVM Website</a></li>
     <li><a href="#announce">Announce the Release</a></li>
-    </ol>
-    
   </ol>
-</div>
+  </li>
+</ol>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="release-admin">
-Release Administrative Tasks</a></div>
-
-<div class="doc_text">
-This section describes a few administrative tasks that need to be done for the
-release process to begin. Specifically, it involves creating the release branch,
- resetting version numbers, and creating the release tarballs for the release 
- team to begin testing.
-</div>
+<h3><a name="release-admin">Release Administrative Tasks</a></h3>
+
+<div>
+
+<p>This section describes a few administrative tasks that need to be done for
+   the release process to begin. Specifically, it involves:</p>
+
+<ul>
+  <li>Creating the release branch,</li>
+  <li>Setting version numbers, and</li>
+  <li>Tagging release candidates for the release team to begin testing</li>
+</ul>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="branch">Create Release Branch</a></div>
-<div class="doc_text">
-<p>Branch the Subversion HEAD using the following procedure:</p>
-  <ol>
-    <li>
-    <p>Verify that the current Subversion HEAD is in decent shape by examining 
-    nightly tester or buildbot results.</p></li>
-    <li>
-    <p>Request all developers to refrain from committing. Offenders get commit
-    rights taken away (temporarily).</p></li>
-  <li>
-  <p> Create the release branch for <tt>llvm</tt>, <tt>llvm-gcc4.2</tt>, 
-  <tt>clang</tt>, and the <tt>test-suite</tt>. The branch name will be 
-  <tt>release_XX</tt>,where <tt>XX</tt> is the major and minor release numbers.
-  <tt>Clang</tt> will have a different release number than <tt>llvm</tt>/
-  <tt>llvm-gcc4</tt> since its first release was years later 
-  (still deciding if this will be true or not). These branches 
-  can be created without checking out anything from subversion.
-  </p>
+<h4><a name="branch">Create Release Branch</a></h4>
+
+<div>
+
+<p>Branch the Subversion trunk using the following procedure:</p>
+
+<ol>
+  <li><p>Remind developers that the release branching is imminent and to refrain
+      from committing patches that might break the build. E.g., new features,
+      large patches for works in progress, an overhaul of the type system, an
+      exciting new TableGen feature, etc.</p></li>
+
+  <li><p>Verify that the current Subversion trunk is in decent shape by
+      examining nightly tester and buildbot results.</p></li>
+
+  <li><p>Create the release branch for <tt>llvm</tt>, <tt>llvm-gcc-4.2</tt>,
+      <tt>clang</tt>, and the <tt>test-suite</tt> from the last known good
+      revision. The branch's name is <tt>release_XY</tt>, where <tt>X</tt> is
+      the major and <tt>Y</tt> the minor release numbers. The branches should be
+      created using the following commands:</p>
   
-  <div class="doc_code">
+<div class="doc_code">
 <pre>
-svn copy https://llvm.org/svn/llvm-project/llvm/trunk \
-         https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XX</i>
-svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk \
-         https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_<i>XX</i>
-svn copy https://llvm.org/svn/llvm-project/test-suite/trunk \
-         https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XX</i>
-svn copy https://llvm.org/svn/llvm-project/cfe/trunk \
-         https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XX</i>
+$ svn copy https://llvm.org/svn/llvm-project/llvm/trunk \
+           https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XY</i>
+
+$ svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk \
+           https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_<i>XY</i>
+
+$ svn copy https://llvm.org/svn/llvm-project/test-suite/trunk \
+           https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XY</i>
+
+$ svn copy https://llvm.org/svn/llvm-project/cfe/trunk \
+           https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XY</i>
 </pre>
-  </div>
+</div></li>
 
-  <li>
-    <p>Advise developers they can work on Subversion HEAD again.</p></li>
-  
-  <li>
-    <p>The Release Manager should switch to the release branch (as all changes 
-    to the release will now be done in the branch).  The easiest way to do this 
-    is to grab another working copy using the following commands:</p>
+  <li><p>Advise developers that they may now check their patches into the
+      Subversion tree again.</p></li>
+
+  <li><p>The Release Manager should switch to the release branch, because all
+      changes to the release will now be done in the branch. The easiest way to
+      do this is to grab a working copy using the following commands:</p>
 
 <div class="doc_code">
 <pre>
-svn co https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XX</i>
-svn co https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_<i>XX</i>
-svn co https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XX</i>
-svn co https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XX</i>
+$ svn co https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XY</i> llvm-<i>X.Y</i>
+
+$ svn co https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_<i>XY</i> llvm-gcc-4.2-<i>X.Y</i>
+
+$ svn co https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XY</i> test-suite-<i>X.Y</i>
+
+$ svn co https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XY</i> clang-<i>X.Y</i>
 </pre>
 </div></li>
+</ol>
 
-  </ol>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="verchanges">Update LLVM Version</a></div>
-<div class="doc_text">
-  <p>
-  After creating the LLVM release branch, update the release branches'
-  autoconf/configure.ac version from X.Xsvn to just X.X. Update it on mainline
-  as well to be the next version (X.X+1svn). Regenerated the configure script
-  for both. This must be done for both <tt>llvm</tt> and the 
-  <tt>test-suite</tt>.
-  </p>
-  <p>FIXME: Add a note about <tt>clang</tt>.</p>
-  <p>In addition, the version number of all the Bugzilla components must be
-  updated for the next release.
-  </p>
+<h4><a name="verchanges">Update LLVM Version</a></h4>
+
+<div>
+
+<p>After creating the LLVM release branch, update the release branches'
+   <tt>autoconf</tt> and <tt>configure.ac</tt> versions from '<tt>X.Ysvn</tt>'
+   to '<tt>X.Y</tt>'. Update it on mainline as well to be the next version
+   ('<tt>X.Y+1svn</tt>'). Regenerate the configure scripts for both
+   <tt>llvm</tt> and the <tt>test-suite</tt>.</p>
+
+<p>In addition, the version numbers of all the Bugzilla components must be
+   updated for the next release.</p>
+
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="dist">Build the LLVM Source Distributions</a></div>
-<div class="doc_text">
-  <p>
-  Create source distributions for <tt>LLVM</tt>, <tt>LLVM-GCC</tt>,  
-  <tt>clang</tt>, and the llvm <tt>test-suite</tt> by exporting the source from 
-  Subversion and archiving it.  This can be done with the following commands:
-  </p>
+<h4><a name="dist">Build the LLVM Release Candidates</a></h4>
+
+<div>
+
+<p>Create release candidates for <tt>llvm</tt>, <tt>llvm-gcc</tt>,
+   <tt>clang</tt>, and the LLVM <tt>test-suite</tt> by tagging the branch with
+   the respective release candidate number. For instance, to create <b>Release
+   Candidate 1</b> you would issue the following commands:</p>
 
 <div class="doc_code">
 <pre>
-svn export https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XX</i> llvm-X.X
-svn export https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_<i>XX</i> llvm-gcc4.2-X.X.source
-svn export https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XX</i> llvm-test-X.X
-svn export https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XX</i> clang-X.X
-tar -czvf - llvm-X.X          | gzip &gt; llvm-X.X.tar.gz
-tar -czvf - llvm-test-X.X     | gzip &gt; llvm-test-X.X.tar.gz
-tar -czvf - llvm-gcc4.2-X.X.source | gzip &gt; llvm-gcc-4.2-X.X.source.tar.gz
-tar -czvf - clang-X.X | gzip &gt; clang-X.X.tar.gz
+$ svn mkdir https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>
+$ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XY</i> \
+           https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>/rc1
+
+$ svn mkdir https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_<i>XY</i>
+$ svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_<i>XY</i> \
+           https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_<i>XY</i>/rc1
+
+$ svn mkdir https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>
+$ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XY</i> \
+           https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>/rc1
+
+$ svn mkdir https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>
+$ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XY</i> \
+           https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>/rc1
 </pre>
 </div>
+
+<p>Similarly, <b>Release Candidate 2</b> would be named <tt>RC2</tt> and so
+   on. This keeps a permanent copy of the release candidate around for people to
+   export and build as they wish. The final released sources will be tagged in
+   the <tt>RELEASE_<i>XY</i></tt> directory as <tt>Final</tt>
+   (c.f. <a href="#tag">Tag the LLVM Final Release</a>).</p>
+
+<p>The Release Manager may supply pre-packaged source tarballs for users. This
+   can be done with the following commands:</p>
+
+<div class="doc_code">
+<pre>
+$ svn export https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>/rc1 llvm-<i>X.Y</i>rc1
+$ svn export https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_<i>XY</i>/rc1 llvm-gcc4.2-<i>X.Y</i>rc1
+$ svn export https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>/rc1 llvm-test-<i>X.Y</i>rc1
+$ svn export https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>/rc1 clang-<i>X.Y</i>rc1
+
+$ tar -cvf - llvm-<i>X.Y</i>rc1        | gzip &gt; llvm-<i>X.Y</i>rc1.src.tar.gz
+$ tar -cvf - llvm-test-<i>X.Y</i>rc1   | gzip &gt; llvm-test-<i>X.Y</i>rc1.src.tar.gz
+$ tar -cvf - llvm-gcc4.2-<i>X.Y</i>rc1 | gzip &gt; llvm-gcc-4.2-<i>X.Y</i>rc1.src.tar.gz
+$ tar -cvf - clang-<i>X.Y</i>rc1       | gzip &gt; clang-<i>X.Y</i>rc1.src.tar.gz
+</pre>
+</div>
+
+</div>
+
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="release-build">
-Building the Release</a></div>
+<h3><a name="release-build">Building the Release</a></h3>
 
-<div class="doc_text">
-The build of <tt>llvm</tt>, <tt>llvm-gcc</tt>, and <tt>clang</tt> must be free
-of errors and warnings in both debug, release+asserts, and release builds. 
-If all builds are clean, then the release passes build qualification.
+<div>
 
-<ol>
-<li>debug: ENABLE_OPTIMIZED=0</li>
-<li>release+asserts: ENABLE_OPTIMIZED=1</li>
-<li>release: ENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1</li>
-</ol>
-</div>
+<p>The builds of <tt>llvm</tt>, <tt>llvm-gcc</tt>, and <tt>clang</tt>
+   <em>must</em> be free of errors and warnings in Debug, Release+Asserts, and
+   Release builds. If all builds are clean, then the release passes Build
+   Qualification.</p>
+
+<p>The <tt>make</tt> options for building the different modes:</p>
+
+<table>
+  <tr><th>Mode</th><th>Options</th></tr>
+  <tr align="left"><td>Debug</td><td><tt>ENABLE_OPTIMIZED=0</tt></td></tr>
+  <tr align="left"><td>Release+Asserts</td><td><tt>ENABLE_OPTIMIZED=1</tt></td></tr>
+  <tr align="left"><td>Release</td><td><tt>ENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1</tt></td></tr>
+</table>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="build">Build LLVM</a></div>
-<div class="doc_text">
-  <p>
-  Build both debug, release+asserts (optimized), and release versions of 
-  LLVM on all supported platforms. Direction to build llvm are 
-  <a href="http://llvm.org/docs/GettingStarted.html#quickstart">here</a>.
-  </p>
+<h4><a name="build">Build LLVM</a></h4>
+
+<div>
+
+<p>Build <tt>Debug</tt>, <tt>Release+Asserts</tt>, and <tt>Release</tt> versions
+   of <tt>llvm</tt> on all supported platforms. Directions to build
+   <tt>llvm</tt> are
+   <a href="GettingStarted.html#quickstart">here</a>.</p>
+
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="llvmgccbin">Build the LLVM GCC Binary Distribution</a></div>
-<div class="doc_text">
-  <p>
-  Creating the LLVM GCC binary distribution (release/optimized) requires 
-  performing the following steps for each supported platform:
-  </p>
+<h4><a name="llvmgccbin">Build the LLVM GCC Binary Distribution</a></h4>
+
+<div>
+
+<p>Creating the <tt>llvm-gcc</tt> binary distribution (Release/Optimized)
+   requires performing the following steps for each supported platform:</p>
+
+<ol>
+  <li><p>Build the <tt>llvm-gcc</tt> front-end by following the directions in
+      the <tt>README.LLVM</tt> file. The front-end must be compiled with C, C++,
+      Objective-C (Mac only), Objective-C++ (Mac only), and Fortran
+      support.</p></li>
+
+  <li><p>Boostrapping must be enabled.</p></li>
+
+  <li><p>Be sure to build with <tt>LLVM_VERSION_INFO=X.Y</tt>, where <tt>X</tt>
+      is the major and <tt>Y</tt> is the minor release numbers.</p></li>
+
+  <li><p>Copy the installation directory to a directory named for the specific
+      target. For example on Red Hat Enterprise Linux, the directory would be
+      named <tt>llvm-gcc4.2-2.6-x86-linux-RHEL4</tt>. Archive and compress the
+      new directory.</p></li>
+</ol>
 
-  <ol>
-    <li>
-    Build the LLVM GCC front-end by following the directions in the README.LLVM
-    file. The frontend must be compiled with c, c++, objc (mac only), 
-    objc++ (mac only) and fortran support. </li>
-    <li>Please boostrap as well.</li>
-    <li>Be sure to build with LLVM_VERSION_INFO=X.X, where X is the major and
-    minor release numbers.
-    </li>
-
-    <li>
-    Copy the installation directory to a directory named for the specific target.
-    For example on Red Hat Enterprise Linux, the directory would be named
-    <tt>llvm-gcc4.2-2.6-x86-linux-RHEL4</tt>. Archive and compress the new directory.  
-    </li>
-  </ol>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="clangbin">Build Clang 
-Binary Distribution</a></div>
-<div class="doc_text">
-  <p>
-  Creating the Clang binary distribution (debug/release/release) requires
-  performing the following steps for each supported platform:
-  </p>
+<h4><a name="clangbin">Build Clang Binary Distribution</a></h4>
 
-  <ol>
-    <li>
-    Build clang according to the directions 
-    <a href="http://clang.llvm.org/get_started.html">here</a>.
-    </li>
-    
-    <li>Build both a debug and release version of clang, but the binary
-    will be a release build.</lI>
-
-    <li>
-    Package clang (details to follow).
-    </li>
-  </ol>
-</div>
+<div>
+
+<p>Creating the <tt>clang</tt> binary distribution
+   (Debug/Release+Asserts/Release) requires performing the following steps for
+   each supported platform:</p>
+
+<ol>
+  <li>Build clang according to the directions
+      <a href="http://clang.llvm.org/get_started.html">here</a>.</li>
+
+  <li>Build both a debug and release version of clang. The binary will be the
+      release build.</lI>
+
+  <li>Package <tt>clang</tt> (details to follow).</li>
+</ol>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="target-build">Target Specific Build 
-Details</a></div>
-<div class="doc_text">
-  <p>
-  The table below specifies which compilers are used for each arch/os combination
-  when qualifying the build of <tt>llvm</tt>, <tt>llvm-gcc</tt>, <tt>clang.
-  </tt></p>
-  
-  <p>
-  <table>
+<h4><a name="target-build">Target Specific Build Details</a></h4>
+
+<div>
+
+<p>The table below specifies which compilers are used for each Arch/OS
+   combination when qualifying the build of <tt>llvm</tt>, <tt>llvm-gcc</tt>,
+   and <tt>clang</tt>.</p>
+
+<table>
   <tr><th>Architecture</th><th>OS</th><th>compiler</th></tr>
   <tr><td>x86-32</td><td>Mac OS 10.5</td><td>gcc 4.0.1</td></tr>
   <tr><td>x86-32</td><td>Linux</td><td>gcc 4.2.X, gcc 4.3.X</td></tr>
   <tr><td>x86-32</td><td>FreeBSD</td><td>gcc 4.2.X</td></tr>
-   <tr><td>x86-32</td><td>mingw</td><td>gcc 3.4.5</td></tr>
+  <tr><td>x86-32</td><td>mingw</td><td>gcc 3.4.5</td></tr>
   <tr><td>x86-64</td><td>Mac OS 10.5</td><td>gcc 4.0.1</td></tr>
   <tr><td>x86-64</td><td>Linux</td><td>gcc 4.2.X, gcc 4.3.X</td></tr>
   <tr><td>x86-64</td><td>FreeBSD</td><td>gcc 4.2.X</td></tr>
- 
-  </table> 
-  </p>
+</table> 
 
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="release-qualify">
-Building the Release</a></div>
-
-<div class="doc_text">
- A release is qualified when it has no regressions from the previous 
- release (or baseline). Regressions are related to correctness only and not 
- performance at this time. <b>Regressions are new failures in the set of tests that
- are used to qualify each product and only include things on the list. 
- Ultimately, there is no end to the number of possible bugs in a release.  We 
- need a very concrete and definitive release criteria that ensures we have 
- monotonically improving quality on some metric.  The metric we use is 
- described below.  This doesn't mean that we don't care about other things, 
- but this are things that must be satisfied before a release can go out</b>
-</div>
+<h3><a name="release-qualify">Building the Release</a></h3>
 
+<div>
+
+<p>A release is qualified when it has no regressions from the previous release
+   (or baseline). Regressions are related to correctness first and performance
+   second. (We may tolerate some minor performance regressions if they are
+   deemed necessary for the general quality of the compiler.)</p>
+
+<p><b>Regressions are new failures in the set of tests that are used to qualify
+   each product and only include things on the list. Every release will have
+   some bugs in it. It is the reality of developing a complex piece of
+   software. We need a very concrete and definitive release criteria that
+   ensures we have monotonically improving quality on some metric. The metric we
+   use is described below. This doesn't mean that we don't care about other
+   criteria, but these are the criteria which we found to be most important and
+   which must be satisfied before a release can go out</b></p>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="llvm-qualify">Qualify LLVM</a></div>
-<div class="doc_text">
-  <p>
-  LLVM is qualified when it has a clean dejagnu test run without a frontend and 
-  it has no regressions when using either <tt>llvm-gcc</tt> or <tt>clang</tt> 
-  with the <tt>test-suite</tt> from the previous release.
-</p>
+<h4><a name="llvm-qualify">Qualify LLVM</a></h4>
+
+<div>
+
+<p>LLVM is qualified when it has a clean test run without a front-end. And it
+   has no regressions when using either <tt>llvm-gcc</tt> or <tt>clang</tt> with
+   the <tt>test-suite</tt> from the previous release.</p>
+
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="llvmgcc-qualify">Qualify LLVM-GCC</a></div>
-<div class="doc_text">
-  <p>
-  <tt>LLVM-GCC</tt> is qualified when front-end specific tests in the 
-  <tt>llvm</tt> dejagnu test suite all pass and there are no regressions in 
-  the <tt>test-suite</tt>.</p>
-  <p>We do not use the gcc dejagnu test suite as release criteria.</p>
+<h4><a name="llvmgcc-qualify">Qualify LLVM-GCC</a></h4>
+
+<div>
+
+<p><tt>LLVM-GCC</tt> is qualified when front-end specific tests in the
+   <tt>llvm</tt> regression test suite all pass and there are no regressions in
+   the <tt>test-suite</tt>.</p>
+
+<p>We do not use the GCC DejaGNU test suite as release criteria.</p>
+
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="clang-qualify">Qualify Clang</a></div>
-<div class="doc_text">
-    <tt>Clang</tt> is qualified when front-end specific tests in the 
-  <tt>llvm</tt> dejagnu test suite all pass, clang's own test suite passes 
-  cleanly, and there are no regressions in the <tt>test-suite</tt>.</p>
+<h4><a name="clang-qualify">Qualify Clang</a></h4>
+
+<div>
+
+<p><tt>Clang</tt> is qualified when front-end specific tests in the 
+   <tt>llvm</tt> dejagnu test suite all pass, clang's own test suite passes
+   cleanly, and there are no regressions in the <tt>test-suite</tt>.</p>
+
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="targets">Specific Target 
-Qualification Details</a></div>
-<div class="doc_text">
-  <p><table>
-  <tr><th>Architecture</th><th>OS</th><th>llvm-gcc baseline</th><th>clang baseline
-  </th><th>tests</th></tr>
+<h4><a name="targets">Specific Target Qualification Details</a></h4>
+
+<div>
+
+<table>
+  <tr><th>Architecture</th><th>OS</th><th>llvm-gcc baseline</th><th>clang baseline</th><th>tests</th></tr>
   <tr><td>x86-32</td><td>Linux</td><td>last release</td><td>last release</td><td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
   <tr><td>x86-32</td><td>FreeBSD</td><td>none</td><td>last release</td><td>llvm dejagnu, clang tests, test-suite</td></tr>
   <tr><td>x86-32</td><td>mingw</td><td>last release</td><td>none</td><td>QT</td></tr>
   <tr><td>x86-64</td><td>Mac OS 10.X</td><td>last release</td><td>last release</td><td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
   <tr><td>x86-64</td><td>Linux</td><td>last release</td><td>last release</td><td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
   <tr><td>x86-64</td><td>FreeBSD</td><td>none</td><td>last release</td><td>llvm dejagnu, clang tests, test-suite</td></tr>
-  </table></p>
+</table>
+
+</div>
+
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="commTest">Community Testing</a></div>
-<div class="doc_text">
-  <p>
-  Once all testing has been completed and appropriate bugs filed, the pre-release
-  tar balls may be put on the website and the LLVM community is notified. Ask that
-  all LLVM developers test the release in 2 ways:</p>
-  <ol>
-  <li>Download llvm-X.X, llvm-test-X.X, and the appropriate llvm-gcc4 
-  and/or clang binary. Build LLVM.
-  Run "make check" and the full llvm-test suite (make TEST=nightly report).</li>
-  <li>Download llvm-X.X, llvm-test-X.X, and the llvm-gcc4 and/or clang source. 
-  Compile everything. Run "make check" and the full llvm-test suite (make TEST=nightly 
-  report).</li>
-  </ol>
-  <p>Ask LLVM developers to submit the report and make check results to the list.
-  Attempt to verify that there are no regressions from the previous release. 
-  The results are not used to qualify a release, but to spot other potential 
-  problems. For unsupported targets, verify that make check at least is 
-  clean.</p>
+<h3><a name="commTest">Community Testing</a></h3>
+<div>
+
+<p>Once all testing has been completed and appropriate bugs filed, the release
+   candidate tarballs are put on the website and the LLVM community is
+   notified. Ask that all LLVM developers test the release in 2 ways:</p>
+
+<ol>
+  <li>Download <tt>llvm-<i>X.Y</i></tt>, <tt>llvm-test-<i>X.Y</i></tt>, and the
+      appropriate <tt>llvm-gcc</tt> and/or <tt>clang</tt> binary. Build
+      LLVM. Run <tt>make check</tt> and the full LLVM test suite (<tt>make
+      TEST=nightly report</tt>).</li>
+
+  <li>Download <tt>llvm-<i>X.Y</i></tt>, <tt>llvm-test-<i>X.Y</i></tt>, and the
+      <tt>llvm-gcc</tt> and/or <tt>clang</tt> source. Compile everything. Run
+      <tt>make check</tt> and the full LLVM test suite (<tt>make TEST=nightly
+      report</tt>).</li>
+</ol>
+
+<p>Ask LLVM developers to submit the test suite report and <tt>make check</tt>
+   results to the list. Verify that there are no regressions from the previous
+   release. The results are not used to qualify a release, but to spot other
+   potential problems. For unsupported targets, verify that <tt>make check</tt>
+   is at least clean.</p>
   
-  <p>During the first round of testing time,
-  all regressions must be fixed before the second pre-release is created.</p>
+<p>During the first round of testing, all regressions must be fixed before the
+   second release candidate is tagged.</p>
   
-  <p>If this is the second round of testing, this is only to ensure the bug 
-  fixes previously merged in have not created new major problems. This is not 
-  the time to solve additional and unrelated bugs. If no patches are merged in, 
-  the release is determined to be ready and the release manager may move onto 
-  the next step.
-  </p>
+<p>If this is the second round of testing, the testing is only to ensure that
+   bug fixes previously merged in have not created new major problems. <i>This
+   is not the time to solve additional and unrelated bugs!</i> If no patches are
+   merged in, the release is determined to be ready and the release manager may
+   move onto the next stage.</p>
+
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="release-patch">Release Patch Rules 
-</a></div>
-<div class="doc_text">
-  <p>
-  Below are the rules regarding patching the release branch.</p>
-  <p>
-  <li>Patches applied to the release branch are only applied by the release 
-  manager.</li>
-  <li>During the first round of testing, patches that fix regressions or that
-  are small and relatively risk free (verified by the appropriate code owner)
-  are applied to the branch. Code owners are asked to be very conservative in 
-  approving patches for the branch and we reserve the right to reject any patch 
-  that does not fix a regression as previously defined.</li>
-  <li>During the remaining rounds of testing, only patches that fix regressions
-  may be applied.</li>
-  
-  </p>
-</div>
+<h3><a name="release-patch">Release Patch Rules</a></h3>
 
+<div>
+
+<p>Below are the rules regarding patching the release branch:</p>
+
+<ol>
+  <li><p>Patches applied to the release branch may only be applied by the
+      release manager.</p></li>
+
+  <li><p>During the first round of testing, patches that fix regressions or that
+      are small and relatively risk free (verified by the appropriate code
+      owner) are applied to the branch. Code owners are asked to be very
+      conservative in approving patches for the branch. We reserve the right to
+      reject any patch that does not fix a regression as previously
+      defined.</p></li>
+
+  <li><p>During the remaining rounds of testing, only patches that fix critical
+      regressions may be applied.</p></li>
+</ol>
 
-<!-- ======================================================================= -->
-<div class="doc_subsection"><a name="release-final">Release Final Tasks 
-</a></div>
-<div class="doc_text">
-  <p>
-  The final stages of the release process involving tagging the release branch,
-  updating documentation that refers to the release, and updating the demo
-  page.</p>
-  <p>FIXME: Add a note if anything needs to be done to the clang website. 
-  Eventually the websites will be merged hopefully.</p>
 </div>
 
+<!-- ======================================================================= -->
+<h3><a name="release-final">Release Final Tasks</a></h3>
+
+<div>
+
+<p>The final stages of the release process involves tagging the "final" release
+   branch, updating documentation that refers to the release, and updating the
+   demo page.</p>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="updocs">Update Documentation</a></div>
-<div class="doc_text">
-  <p>
-  Review the documentation and ensure that it is up to date.  The Release Notes
-  must be updated to reflect bug fixes, new known issues, and changes in the
-  list of supported platforms.  The Getting Started Guide should be updated to
-  reflect the new release version number tag avaiable from Subversion and
-  changes in basic system requirements. Merge both changes from mainline into 
-  the release branch.
-  </p>
+<h4><a name="updocs">Update Documentation</a></h4>
+
+<div>
+
+<p>Review the documentation and ensure that it is up to date. The "Release
+   Notes" must be updated to reflect new features, bug fixes, new known issues,
+   and changes in the list of supported platforms. The "Getting Started Guide"
+   should be updated to reflect the new release version number tag avaiable from
+   Subversion and changes in basic system requirements. Merge both changes from
+   mainline into the release branch.</p>
+
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="tag">Tag the Release Branch</a></div>
-<div class="doc_text">
-  <p>Tag the release branch using the following procedure:</p>
+<h4><a name="tag">Tag the LLVM Final Release</a></h4>
+
+<div>
+
+<p>Tag the final release sources using the following procedure:</p>
+
 <div class="doc_code">
 <pre>
-svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XX \
-         https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XX</i>
-svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XX \
-         https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_<i>XX</i>
-svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \
-         https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XX</i>
+$ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \
+           https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>/Final
+
+$ svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XY \
+           https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_<i>XY</i>/Final
+
+$ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \
+           https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>/Final
+
+$ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \
+           https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>/Final
 </pre>
 </div>
+
 </div>
 
+</div>
+
+<!-- ======================================================================= -->
+<h3><a name="updemo">Update the LLVM Demo Page</a></h3>
 
+<div>
+
+<p>The LLVM demo page must be updated to use the new release. This consists of
+   using the new <tt>llvm-gcc</tt> binary and building LLVM.</p>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="updemo">Update the LLVM Demo Page</a></div>
-<div class="doc_text">
-  <p>
-  The LLVM demo page must be updated to use the new release. This consists of
-  using the llvm-gcc binary and building LLVM. Update the website demo page
-  configuration to use the new release.</p>
+<h4><a name="webupdates">Update the LLVM Website</a></h4>
+
+<div>
+
+<p>The website must be updated before the release announcement is sent out. Here
+   is what to do:</p>
+
+<ol>
+  <li>Check out the <tt>www</tt> module from Subversion.</li>
+
+  <li>Create a new subdirectory <tt>X.Y</tt> in the releases directory.</li>
+
+  <li>Commit the <tt>llvm</tt>, <tt>test-suite</tt>, <tt>llvm-gcc</tt> source,
+      <tt>clang source</tt>, <tt>clang binaries</tt>, and <tt>llvm-gcc</tt>
+      binaries in this new directory.</li>
+
+  <li>Copy and commit the <tt>llvm/docs</tt> and <tt>LICENSE.txt</tt> files
+      into this new directory. The docs should be built with
+      <tt>BUILD_FOR_WEBSITE=1</tt>.</li>
+
+  <li>Commit the <tt>index.html</tt> to the <tt>release/X.Y</tt> directory to
+      redirect (use from previous release.</li>
+
+  <li>Update the <tt>releases/download.html</tt> file with the new release.</li>
+
+  <li>Update the <tt>releases/index.html</tt> with the new release and link to
+      release documentation.</li>
+
+  <li>Finally, update the main page (<tt>index.html</tt> and sidebar) to point
+      to the new release and release announcement. Make sure this all gets
+      committed back into Subversion.</li>
+</ol>
+
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="webupdates">Update the LLVM Website</a></div>
-<div class="doc_text">
-  <p>
-  The website must be updated before the release announcement is sent out. Here is
-  what to do:</p>
-  <ol>
-  <li> Check out the <tt>website</tt> module from CVS. </li> 
-  <li> Create a new subdirectory X.X in the releases directory. </li> 
-  <li> Commit the <tt>llvm</tt>, <tt>test-suite</tt>, <tt>llvm-gcc</tt> source,
-  <tt>clang source</tt>, <tt>clang binaries</tt>, 
-  and <tt>llvm-gcc</tt> binaries in this new directory. </li>
-  <li> Copy and commit the <tt>llvm/docs</tt> and <tt>LICENSE.txt</tt>
-  files into this new directory. The docs should be built with BUILD_FOR_WEBSITE=1.</li>
-  <li> Commit the index.html to the release/X.X directory to redirect (use from previous
-  release. </li>
-  <li> Update the <tt>releases/download.html</tt> file with the new release. </li>
-  <li>Update the <tt>releases/index.html</tt> with the new release and link to 
-  release documentation.</li>
-  <li> Finally, update the main page (<tt>index.html</tt> and sidebar) to
-  point to the new release and release announcement. Make sure this all gets
-  committed back into Subversion.</li>
-  </ol>
+<h4><a name="announce">Announce the Release</a></h4>
+
+<div>
+
+<p>Have Chris send out the release announcement when everything is finished.</p>
+
+</div>
+
 </div>
 
-<!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="announce">Announce the Release</a></div>
-<div class="doc_text">
-  <p>Have Chris send out the release announcement when everything is finished.</p>
 </div>
 
 <!-- *********************************************************************** -->
@@ -515,9 +617,9 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \
   src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-  <a href="http://llvm.cs.uiuc.edu">The LLVM Compiler Infrastructure</a>
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2010-07-07 09:48:00 +0200 (Wed, 07 Jul 2010) $
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/HowToSubmitABug.html b/docs/HowToSubmitABug.html
index 64c6141..54f548c 100644
--- a/docs/HowToSubmitABug.html
+++ b/docs/HowToSubmitABug.html
@@ -7,9 +7,9 @@
 </head>
 <body>
 
-<div class="doc_title">
+<h1>
   How to submit an LLVM bug report
-</div>
+</h1>
 
 <table class="layout" style="width: 90%" >
 <tr class="layout">
@@ -37,12 +37,12 @@
 </table>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="introduction">Introduction - Got bugs?</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>If you're working with LLVM and run into a bug, we definitely want to know
 about it.  This document describes what you can do to increase the odds of
@@ -76,12 +76,12 @@ information:</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="crashers">Crashing Bugs</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>More often than not, bugs in the compiler cause it to crash&mdash;often due
 to an assertion failure of some sort. The most important
@@ -109,14 +109,12 @@ with the following extra command line options:</p>
 
 </ul>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="front-end">Front-end bugs</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>If the problem is in the front-end, you should re-run the same
 <tt>llvm-gcc</tt> command that resulted in the crash, but add the
@@ -137,11 +135,11 @@ has instructions on the best way to use delta.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ct_optimizer">Compile-time optimization bugs</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>If you find that a bug crashes in the optimizer, compile your test-case to a
 <tt>.bc</tt> file by passing "<tt><b>-emit-llvm -O0 -c -o foo.bc</b></tt>".
@@ -171,11 +169,11 @@ that bugpoint emits.  If something goes wrong with bugpoint, please submit the
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ct_codegen">Code generator bugs</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>If you find a bug that crashes llvm-gcc in the code generator, compile your
 source file to a .bc file by passing "<tt><b>-emit-llvm -c -o foo.bc</b></tt>"
@@ -207,13 +205,15 @@ that bugpoint emits.  If something goes wrong with bugpoint, please submit the
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="miscompilations">Miscompilations</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>If llvm-gcc successfully produces an executable, but that executable doesn't
 run right, this is either a bug in the code or a bug in the
@@ -241,12 +241,12 @@ error.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="codegen">Incorrect code generation</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Similarly to debugging incorrect compilation by mis-behaving passes, you can
 debug incorrect code generation by either LLC or the JIT, using
@@ -338,9 +338,9 @@ the following:</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 580ae79..0e37e82 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -12,7 +12,7 @@
 
 <body>
 
-<div class="doc_title"> LLVM Language Reference Manual </div>
+<h1>LLVM Language Reference Manual</h1>
 <ol>
   <li><a href="#abstract">Abstract</a></li>
   <li><a href="#introduction">Introduction</a></li>
@@ -321,10 +321,10 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"> <a name="abstract">Abstract </a></div>
+<h2><a name="abstract">Abstract</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This document is a reference manual for the LLVM assembly language. LLVM is
    a Static Single Assignment (SSA) based representation that provides type
@@ -335,10 +335,10 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"> <a name="introduction">Introduction</a> </div>
+<h2><a name="introduction">Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM code representation is designed to be used in three different forms:
    as an in-memory compiler IR, as an on-disk bitcode representation (suitable
@@ -359,12 +359,12 @@
    variable is never accessed outside of the current function, allowing it to
    be promoted to a simple SSA value instead of a memory location.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="wellformed">Well-Formedness</a> </div>
+<h4>
+  <a name="wellformed">Well-Formedness</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>It is important to note that this document describes 'well formed' LLVM
    assembly language.  There is a difference between what the parser accepts and
@@ -384,13 +384,15 @@
 
 </div>
 
+</div>
+
 <!-- Describe the typesetting conventions here. -->
 
 <!-- *********************************************************************** -->
-<div class="doc_section"> <a name="identifiers">Identifiers</a> </div>
+<h2><a name="identifiers">Identifiers</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>LLVM identifiers come in two basic types: global and local. Global
    identifiers (functions, global variables) begin with the <tt>'@'</tt>
@@ -475,14 +477,15 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"> <a name="highlevel">High Level Structure</a> </div>
+<h2><a name="highlevel">High Level Structure</a></h2>
 <!-- *********************************************************************** -->
-
+<div>
 <!-- ======================================================================= -->
-<div class="doc_subsection"> <a name="modulestructure">Module Structure</a>
-</div>
+<h3>
+  <a name="modulestructure">Module Structure</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM programs are composed of "Module"s, each of which is a translation unit
    of the input programs.  Each module consists of functions, global variables,
@@ -528,11 +531,11 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="linkage">Linkage Types</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>All Global Variables and Functions have one of the following types of
    linkage:</p>
@@ -677,11 +680,11 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="callingconv">Calling Conventions</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM <a href="#functionstructure">functions</a>, <a href="#i_call">calls</a>
    and <a href="#i_invoke">invokes</a> can all have an optional calling
@@ -750,11 +753,11 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="visibility">Visibility Styles</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>All Global Variables and Functions have one of the following visibility
    styles:</p>
@@ -784,11 +787,11 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="namedtypes">Named Types</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM IR allows you to specify name aliases for certain types.  This can make
    it easier to read the IR and make the IR more condensed (particularly when
@@ -815,11 +818,11 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="globalvars">Global Variables</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Global variables define regions of memory allocated at compilation time
    instead of run-time.  Global variables may optionally be initialized, may
@@ -883,11 +886,11 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="functionstructure">Functions</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM function definitions consist of the "<tt>define</tt>" keyword, an
    optional <a href="#linkage">linkage type</a>, an optional
@@ -946,11 +949,11 @@ define [<a href="#linkage">linkage</a>] [<a href="#visibility">visibility</a>]
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="aliasstructure">Aliases</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Aliases act as "second name" for the aliasee value (which can be either
    function, global variable, another alias or bitcast of global value). Aliases
@@ -965,11 +968,11 @@ define [<a href="#linkage">linkage</a>] [<a href="#visibility">visibility</a>]
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="namedmetadatastructure">Named Metadata</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Named metadata is a collection of metadata. <a href="#metadata">Metadata
    nodes</a> (but not metadata strings) are the only valid operands for
@@ -988,9 +991,11 @@ define [<a href="#linkage">linkage</a>] [<a href="#visibility">visibility</a>]
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="paramattrs">Parameter Attributes</a></div>
+<h3>
+  <a name="paramattrs">Parameter Attributes</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The return type and each parameter of a function type may have a set of
    <i>parameter attributes</i> associated with them. Parameter attributes are
@@ -1017,13 +1022,15 @@ declare signext i8 @returns_signed_char()
 <dl>
   <dt><tt><b>zeroext</b></tt></dt>
   <dd>This indicates to the code generator that the parameter or return value
-      should be zero-extended to a 32-bit value by the caller (for a parameter)
-      or the callee (for a return value).</dd>
+      should be zero-extended to the extent required by the target's ABI (which
+      is usually 32-bits, but is 8-bits for a i1 on x86-64) by the caller (for a
+      parameter) or the callee (for a return value).</dd>
 
   <dt><tt><b>signext</b></tt></dt>
   <dd>This indicates to the code generator that the parameter or return value
-      should be sign-extended to a 32-bit value by the caller (for a parameter)
-      or the callee (for a return value).</dd>
+      should be sign-extended to the extent required by the target's ABI (which
+      is usually 32-bits) by the caller (for a parameter) or the callee (for a
+      return value).</dd>
 
   <dt><tt><b>inreg</b></tt></dt>
   <dd>This indicates that this parameter or return value should be treated in a
@@ -1095,11 +1102,11 @@ declare signext i8 @returns_signed_char()
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="gc">Garbage Collector Names</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Each function may specify a garbage collector name, which is simply a
    string:</p>
@@ -1115,11 +1122,11 @@ define void @f() gc "name" { ... }
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="fnattrs">Function Attributes</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Function attributes are set to communicate additional information about a
    function. Function attributes are considered to be part of the function, not
@@ -1238,11 +1245,11 @@ define void @f() optsize { ... }
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="moduleasm">Module-Level Inline Assembly</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Modules may contain "module-level inline asm" blocks, which corresponds to
    the GCC "file scope inline asm" blocks.  These blocks are internally
@@ -1264,11 +1271,11 @@ module asm "more can go here"
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="datalayout">Data Layout</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>A module may specify a target specific data layout string that specifies how
    data is to be laid out in memory. The syntax for the data layout is
@@ -1376,11 +1383,11 @@ target datalayout = "<i>layout specification</i>"
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="pointeraliasing">Pointer Aliasing Rules</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Any memory access must be done through a pointer value associated
 with an address range of the memory access, otherwise the behavior
@@ -1440,11 +1447,11 @@ to implement type-based alias analysis.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="volatile">Volatile Memory Accesses</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Certain memory accesses, such as <a href="#i_load"><tt>load</tt></a>s, <a
 href="#i_store"><tt>store</tt></a>s, and <a
@@ -1457,11 +1464,13 @@ synchronization behavior.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"> <a name="typesystem">Type System</a> </div>
+<h2><a name="typesystem">Type System</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM type system is one of the most important features of the
    intermediate representation.  Being typed enables a number of optimizations
@@ -1471,13 +1480,12 @@ synchronization behavior.</p>
    and transformations that are not feasible to perform on normal three address
    code representations.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection"> <a name="t_classifications">Type
-Classifications</a> </div>
+<h3>
+  <a name="t_classifications">Type Classifications</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The types fall into a few useful classifications:</p>
 
@@ -1534,19 +1542,21 @@ Classifications</a> </div>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"> <a name="t_primitive">Primitive Types</a> </div>
+<h3>
+  <a name="t_primitive">Primitive Types</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The primitive types are the fundamental building blocks of the LLVM
    system.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_integer">Integer Type</a> </div>
+<h4>
+  <a name="t_integer">Integer Type</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Overview:</h5>
 <p>The integer type is a very simple type that simply specifies an arbitrary
@@ -1580,9 +1590,11 @@ Classifications</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_floating">Floating Point Types</a> </div>
+<h4>
+  <a name="t_floating">Floating Point Types</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <table>
   <tbody>
@@ -1598,9 +1610,11 @@ Classifications</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_x86mmx">X86mmx Type</a> </div>
+<h4>
+  <a name="t_x86mmx">X86mmx Type</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Overview:</h5>
 <p>The x86mmx type represents a value held in an MMX register on an x86 machine.  The operations allowed on it are quite limited:  parameters and return values, load and store, and bitcast.  User-specified MMX instructions are represented as intrinsic or asm calls with arguments and/or results of this type.  There are no arrays, vectors or constants of this type.</p>
@@ -1613,9 +1627,11 @@ Classifications</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_void">Void Type</a> </div>
+<h4>
+  <a name="t_void">Void Type</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Overview:</h5>
 <p>The void type does not represent any value and has no size.</p>
@@ -1628,9 +1644,11 @@ Classifications</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_label">Label Type</a> </div>
+<h4>
+  <a name="t_label">Label Type</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Overview:</h5>
 <p>The label type represents code labels.</p>
@@ -1643,9 +1661,11 @@ Classifications</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_metadata">Metadata Type</a> </div>
+<h4>
+  <a name="t_metadata">Metadata Type</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Overview:</h5>
 <p>The metadata type represents embedded metadata. No derived types may be
@@ -1659,11 +1679,14 @@ Classifications</a> </div>
 
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"> <a name="t_derived">Derived Types</a> </div>
+<h3>
+  <a name="t_derived">Derived Types</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The real power in LLVM comes from the derived types in the system.  This is
    what allows a programmer to represent arrays, functions, pointers, and other
@@ -1673,12 +1696,12 @@ Classifications</a> </div>
    of another array.</p>
 
    
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_aggregate">Aggregate Types</a> </div>
+<h4>
+  <a name="t_aggregate">Aggregate Types</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Aggregate Types are a subset of derived types that can contain multiple
   member types. <a href="#t_array">Arrays</a>,
@@ -1688,9 +1711,11 @@ Classifications</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_array">Array Type</a> </div>
+<h4>
+  <a name="t_array">Array Type</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Overview:</h5>
 <p>The array type is a very simple derived type that arranges elements
@@ -1746,9 +1771,11 @@ Classifications</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_function">Function Type</a> </div>
+<h4>
+  <a name="t_function">Function Type</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Overview:</h5>
 <p>The function type can be thought of as a function signature.  It consists of
@@ -1799,9 +1826,11 @@ Classifications</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_struct">Structure Type</a> </div>
+<h4>
+  <a name="t_struct">Structure Type</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Overview:</h5>
 <p>The structure type is used to represent a collection of data members together
@@ -1837,10 +1866,11 @@ Classifications</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_pstruct">Packed Structure Type</a>
-</div>
+<h4>
+  <a name="t_pstruct">Packed Structure Type</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Overview:</h5>
 <p>The packed structure type is used to represent a collection of data members
@@ -1875,9 +1905,11 @@ Classifications</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_pointer">Pointer Type</a> </div>
+<h4>
+  <a name="t_pointer">Pointer Type</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Overview:</h5>
 <p>The pointer type is used to specify memory locations.
@@ -1919,9 +1951,11 @@ Classifications</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_vector">Vector Type</a> </div>
+<h4>
+  <a name="t_vector">Vector Type</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Overview:</h5>
 <p>A vector type is a simple derived type that represents a vector of elements.
@@ -1958,8 +1992,11 @@ Classifications</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_opaque">Opaque Type</a> </div>
-<div class="doc_text">
+<h4>
+  <a name="t_opaque">Opaque Type</a>
+</h4>
+
+<div>
 
 <h5>Overview:</h5>
 <p>Opaque types are used to represent unknown types in the system.  This
@@ -1982,12 +2019,14 @@ Classifications</a> </div>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="t_uprefs">Type Up-references</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <h5>Overview:</h5>
 <p>An "up reference" allows you to refer to a lexically enclosing type without
@@ -2030,21 +2069,23 @@ Classifications</a> </div>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"> <a name="constants">Constants</a> </div>
+<h2><a name="constants">Constants</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>LLVM has several different basic types of constants.  This section describes
    them all and their syntax.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="simpleconstants">Simple Constants</a></div>
+<h3>
+  <a name="simpleconstants">Simple Constants</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <dl>
   <dt><b>Boolean constants</b></dt>
@@ -2097,12 +2138,12 @@ Classifications</a> </div>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
 <a name="aggregateconstants"></a> <!-- old anchor -->
 <a name="complexconstants">Complex Constants</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Complex constants are a (potentially recursive) combination of simple
    constants and smaller complex constants.</p>
@@ -2152,11 +2193,11 @@ Classifications</a> </div>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="globalconstants">Global Variable and Function Addresses</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The addresses of <a href="#globalvars">global variables</a>
    and <a href="#functionstructure">functions</a> are always implicitly valid
@@ -2174,8 +2215,11 @@ Classifications</a> </div>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="undefvalues">Undefined Values</a></div>
-<div class="doc_text">
+<h3>
+  <a name="undefvalues">Undefined Values</a>
+</h3>
+
+<div>
 
 <p>The string '<tt>undef</tt>' can be used anywhere a constant is expected, and
    indicates that the user of the value may receive an unspecified bit-pattern.
@@ -2314,8 +2358,11 @@ b: unreachable
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="trapvalues">Trap Values</a></div>
-<div class="doc_text">
+<h3>
+  <a name="trapvalues">Trap Values</a>
+</h3>
+
+<div>
 
 <p>Trap values are similar to <a href="#undefvalues">undef values</a>, however
    instead of representing an unspecified bit pattern, they represent the
@@ -2367,7 +2414,12 @@ b: unreachable
     <a href="#terminators">terminator instruction</a>
     if the terminator instruction has multiple successors and the instruction
     is always executed when control transfers to one of the successors, and
-    may not be executed when control is transfered to another.</li>
+    may not be executed when control is transferred to another.</li>
+
+<li>Additionally, an instruction also <i>control-depends</i> on a terminator
+    instruction if the set of instructions it otherwise depends on would be
+    different if the terminator had transferred control to a different
+    successor.</li>
 
 <li>Dependence is transitive.</li>
 
@@ -2411,17 +2463,34 @@ end:
                                      ; control-dependent on %cmp, so this
                                      ; always results in a trap value.
 
-  volatile store i32 0, i32* @g      ; %end is control-equivalent to %entry
-                                     ; so this is defined (ignoring earlier
+  volatile store i32 0, i32* @g      ; This would depend on the store in %true
+                                     ; if %cmp is true, or the store in %entry
+                                     ; otherwise, so this is undefined behavior.
+
+  %br i1 %cmp, %second_true, %second_end
+                                     ; The same branch again, but this time the
+                                     ; true block doesn't have side effects.
+
+second_true:
+  ; No side effects!
+  br label %end
+
+second_end:
+  volatile store i32 0, i32* @g      ; This time, the instruction always depends
+                                     ; on the store in %end. Also, it is
+                                     ; control-equivalent to %end, so this is
+                                     ; well- defined (again, ignoring earlier
                                      ; undefined behavior in this example).
 </pre>
 
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="blockaddress">Addresses of Basic
-    Blocks</a></div>
-<div class="doc_text">
+<h3>
+  <a name="blockaddress">Addresses of Basic Blocks</a>
+</h3>
+
+<div>
 
 <p><b><tt>blockaddress(@function, %block)</tt></b></p>
 
@@ -2446,10 +2515,11 @@ end:
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="constantexprs">Constant Expressions</a>
-</div>
+<h3>
+  <a name="constantexprs">Constant Expressions</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Constant expressions are used to allow expressions involving other constants
    to be used as constants.  Constant expressions may be of
@@ -2575,16 +2645,18 @@ end:
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"> <a name="othervalues">Other Values</a> </div>
+<h2><a name="othervalues">Other Values</a></h2>
 <!-- *********************************************************************** -->
-
+<div>
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
 <a name="inlineasm">Inline Assembler Expressions</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM supports inline assembler expressions (as opposed
    to <a href="#moduleasm"> Module-Level Inline Assembly</a>) through the use of
@@ -2633,13 +2705,12 @@ call void asm alignstack "eieio", ""()
    documented here.  Constraints on what can be done (e.g. duplication, moving,
    etc need to be documented).  This is probably best done by reference to
    another document that covers inline asm from a holistic perspective.</p>
-</div>
 
-<div class="doc_subsubsection">
+<h4>
 <a name="inlineasm_md">Inline Asm Metadata</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The call instructions that wrap inline asm nodes may have a "!srcloc" MDNode
    attached to it that contains a list of constant integers.  If present, the
@@ -2660,12 +2731,14 @@ call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
 
 </div>
 
-<!-- ======================================================================= -->
-<div class="doc_subsection"><a name="metadata">Metadata Nodes and Metadata
-  Strings</a>
 </div>
 
-<div class="doc_text">
+<!-- ======================================================================= -->
+<h3>
+  <a name="metadata">Metadata Nodes and Metadata Strings</a>
+</h3>
+
+<div>
 
 <p>LLVM IR allows metadata to be attached to instructions in the program that
    can convey extra information about the code to the optimizers and code
@@ -2690,25 +2763,31 @@ call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
 <p>Metadata can be used as function arguments. Here <tt>llvm.dbg.value</tt> 
    function is using two metadata arguments.</p>
 
-     <pre class="doc_code">
-       call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
-     </pre>
+<div class="doc_code">
+<pre>
+call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
+</pre>
+</div>
 
 <p>Metadata can be attached with an instruction. Here metadata <tt>!21</tt> is
    attached with <tt>add</tt> instruction using <tt>!dbg</tt> identifier.</p>
 
-    <pre class="doc_code">
-      %indvar.next = add i64 %indvar, 1, !dbg !21
-    </pre>
+<div class="doc_code">
+<pre>
+%indvar.next = add i64 %indvar, 1, !dbg !21
+</pre>
+</div>
+
 </div>
 
+</div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="intrinsic_globals">Intrinsic Global Variables</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
-
+<div>
 <p>LLVM has a number of "magic" global variables that contain data that affect
 code generation or other IR semantics.  These are documented here.  All globals
 of this sort should have a section specified as "<tt>llvm.metadata</tt>".  This
@@ -2716,11 +2795,11 @@ section and all globals that start with "<tt>llvm.</tt>" are reserved for use
 by LLVM.</p>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
 <a name="intg_used">The '<tt>llvm.used</tt>' Global Variable</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>@llvm.used</tt> global is an array with i8* element type which has <a
 href="#linkage_appending">appending linkage</a>.  This array contains a list of
@@ -2751,11 +2830,13 @@ object file to prevent the assembler and linker from molesting the symbol.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
-<a name="intg_compiler_used">The '<tt>llvm.compiler.used</tt>' Global Variable</a>
-</div>
+<h3>
+  <a name="intg_compiler_used">
+    The '<tt>llvm.compiler.used</tt>' Global Variable
+  </a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>@llvm.compiler.used</tt> directive is the same as the
 <tt>@llvm.used</tt> directive, except that it only prevents the compiler from
@@ -2769,11 +2850,11 @@ should not be exposed to source languages.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
 <a name="intg_global_ctors">The '<tt>llvm.global_ctors</tt>' Global Variable</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <pre>
 %0 = type { i32, void ()* }
 @llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor }]
@@ -2784,11 +2865,11 @@ should not be exposed to source languages.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
 <a name="intg_global_dtors">The '<tt>llvm.global_dtors</tt>' Global Variable</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <pre>
 %0 = type { i32, void ()* }
 @llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor }]
@@ -2799,12 +2880,13 @@ should not be exposed to source languages.</p>
 
 </div>
 
+</div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"> <a name="instref">Instruction Reference</a> </div>
+<h2><a name="instref">Instruction Reference</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM instruction set consists of several different classifications of
    instructions: <a href="#terminators">terminator
@@ -2813,13 +2895,12 @@ should not be exposed to source languages.</p>
    <a href="#memoryops">memory instructions</a>, and
    <a href="#otherops">other instructions</a>.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection"> <a name="terminators">Terminator
-Instructions</a> </div>
+<h3>
+  <a name="terminators">Terminator Instructions</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>As mentioned <a href="#functionstructure">previously</a>, every basic block
    in a program ends with a "Terminator" instruction, which indicates which
@@ -2837,13 +2918,12 @@ Instructions</a> </div>
    '<a href="#i_unwind"><tt>unwind</tt></a>' instruction, and the
    '<a href="#i_unreachable"><tt>unreachable</tt></a>' instruction.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="i_ret">'<tt>ret</tt>'
-Instruction</a> </div>
+<h4>
+  <a name="i_ret">'<tt>ret</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -2889,9 +2969,11 @@ Instruction</a> </div>
 
 </div>
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="i_br">'<tt>br</tt>' Instruction</a> </div>
+<h4>
+  <a name="i_br">'<tt>br</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -2930,11 +3012,11 @@ IfUnequal:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_switch">'<tt>switch</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -2985,11 +3067,11 @@ IfUnequal:
 
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_indirectbr">'<tt>indirectbr</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3033,11 +3115,11 @@ IfUnequal:
 
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="i_invoke">'<tt>invoke</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3123,10 +3205,11 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 
 <!-- _______________________________________________________________________ -->
 
-<div class="doc_subsubsection"> <a name="i_unwind">'<tt>unwind</tt>'
-Instruction</a> </div>
+<h4>
+  <a name="i_unwind">'<tt>unwind</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3154,10 +3237,11 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 
 <!-- _______________________________________________________________________ -->
 
-<div class="doc_subsubsection"> <a name="i_unreachable">'<tt>unreachable</tt>'
-Instruction</a> </div>
+<h4>
+  <a name="i_unreachable">'<tt>unreachable</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3175,10 +3259,14 @@ Instruction</a> </div>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection"> <a name="binaryops">Binary Operations</a> </div>
+<h3>
+  <a name="binaryops">Binary Operations</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Binary operators are used to do most of the computation in a program.  They
    require two operands of the same type, execute an operation on them, and
@@ -3188,14 +3276,12 @@ Instruction</a> </div>
 
 <p>There are several different binary operators:</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="i_add">'<tt>add</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3236,11 +3322,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="i_fadd">'<tt>fadd</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3266,11 +3352,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_sub">'<tt>sub</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3318,11 +3404,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_fsub">'<tt>fsub</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3354,11 +3440,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="i_mul">'<tt>mul</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3404,11 +3490,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="i_fmul">'<tt>fmul</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3434,10 +3520,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="i_udiv">'<tt>udiv</tt>' Instruction
-</a></div>
+<h4>
+  <a name="i_udiv">'<tt>udiv</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3474,10 +3561,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="i_sdiv">'<tt>sdiv</tt>' Instruction
-</a> </div>
+<h4>
+  <a name="i_sdiv">'<tt>sdiv</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3516,10 +3604,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="i_fdiv">'<tt>fdiv</tt>'
-Instruction</a> </div>
+<h4>
+  <a name="i_fdiv">'<tt>fdiv</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3545,10 +3634,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="i_urem">'<tt>urem</tt>' Instruction</a>
-</div>
+<h4>
+  <a name="i_urem">'<tt>urem</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3582,11 +3672,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="i_srem">'<tt>srem</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3606,9 +3696,10 @@ Instruction</a> </div>
 
 <h5>Semantics:</h5>
 <p>This instruction returns the <i>remainder</i> of a division (where the result
-   has the same sign as the dividend, <tt>op1</tt>), not the <i>modulo</i>
-   operator (where the result has the same sign as the divisor, <tt>op2</tt>) of
-   a value.  For more information about the difference,
+   is either zero or has the same sign as the dividend, <tt>op1</tt>), not the
+   <i>modulo</i> operator (where the result is either zero or has the same sign
+   as the divisor, <tt>op2</tt>) of a value.
+   For more information about the difference,
    see <a href="http://mathforum.org/dr.math/problems/anne.4.28.99.html">The
    Math Forum</a>. For a table of how this is implemented in various languages,
    please see <a href="http://en.wikipedia.org/wiki/Modulo_operation">
@@ -3632,10 +3723,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="i_frem">'<tt>frem</tt>' Instruction</a> </div>
+<h4>
+  <a name="i_frem">'<tt>frem</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3662,11 +3754,14 @@ Instruction</a> </div>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection"> <a name="bitwiseops">Bitwise Binary
-Operations</a> </div>
+<h3>
+  <a name="bitwiseops">Bitwise Binary Operations</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Bitwise binary operators are used to do various forms of bit-twiddling in a
    program.  They are generally very efficient instructions and can commonly be
@@ -3674,13 +3769,12 @@ Operations</a> </div>
    same type, execute an operation on them, and produce a single value.  The
    resulting value is the same type as its operands.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="i_shl">'<tt>shl</tt>'
-Instruction</a> </div>
+<h4>
+  <a name="i_shl">'<tt>shl</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3727,10 +3821,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="i_lshr">'<tt>lshr</tt>'
-Instruction</a> </div>
+<h4>
+  <a name="i_lshr">'<tt>lshr</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3773,9 +3868,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="i_ashr">'<tt>ashr</tt>'
-Instruction</a> </div>
-<div class="doc_text">
+<h4>
+  <a name="i_ashr">'<tt>ashr</tt>' Instruction</a>
+</h4>
+
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3818,10 +3915,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="i_and">'<tt>and</tt>'
-Instruction</a> </div>
+<h4>
+  <a name="i_and">'<tt>and</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3878,9 +3976,11 @@ Instruction</a> </div>
 </pre>
 </div>
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="i_or">'<tt>or</tt>' Instruction</a> </div>
+<h4>
+  <a name="i_or">'<tt>or</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -3939,10 +4039,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="i_xor">'<tt>xor</tt>'
-Instruction</a> </div>
+<h4>
+  <a name="i_xor">'<tt>xor</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4002,12 +4103,14 @@ Instruction</a> </div>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="vectorops">Vector Operations</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM supports several instructions to represent vector operations in a
    target-independent manner.  These instructions cover the element-access and
@@ -4016,14 +4119,12 @@ Instruction</a> </div>
    will want to use target-specific intrinsics to take full advantage of a
    specific target.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_extractelement">'<tt>extractelement</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4055,11 +4156,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_insertelement">'<tt>insertelement</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4091,11 +4192,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_shufflevector">'<tt>shufflevector</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4138,24 +4239,24 @@ Instruction</a> </div>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="aggregateops">Aggregate Operations</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM supports several instructions for working with
   <a href="#t_aggregate">aggregate</a> values.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_extractvalue">'<tt>extractvalue</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4193,11 +4294,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_insertvalue">'<tt>insertvalue</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4231,27 +4332,26 @@ Instruction</a> </div>
 
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="memoryops">Memory Access and Addressing Operations</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>A key design point of an SSA-based representation is how it represents
    memory.  In LLVM, no memory locations are in SSA form, which makes things
    very simple.  This section describes how to read, write, and allocate
    memory in LLVM.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="i_alloca">'<tt>alloca</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4298,10 +4398,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="i_load">'<tt>load</tt>'
-Instruction</a> </div>
+<h4>
+  <a name="i_load">'<tt>load</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4356,10 +4457,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="i_store">'<tt>store</tt>'
-Instruction</a> </div>
+<h4>
+  <a name="i_store">'<tt>store</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4417,11 +4519,11 @@ Instruction</a> </div>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_getelementptr">'<tt>getelementptr</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4547,23 +4649,25 @@ entry:
 
 </div>
 
-<!-- ======================================================================= -->
-<div class="doc_subsection"> <a name="convertops">Conversion Operations</a>
 </div>
 
-<div class="doc_text">
+<!-- ======================================================================= -->
+<h3>
+  <a name="convertops">Conversion Operations</a>
+</h3>
+
+<div>
 
 <p>The instructions in this category are the conversion instructions (casting)
    which all take a single operand and a type. They perform various bit
    conversions on the operand.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_trunc">'<tt>trunc .. to</tt>' Instruction</a>
-</div>
-<div class="doc_text">
+</h4>
+
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4599,10 +4703,11 @@ entry:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_zext">'<tt>zext .. to</tt>' Instruction</a>
-</div>
-<div class="doc_text">
+</h4>
+
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4638,10 +4743,11 @@ entry:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_sext">'<tt>sext .. to</tt>' Instruction</a>
-</div>
-<div class="doc_text">
+</h4>
+
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4676,11 +4782,11 @@ entry:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_fptrunc">'<tt>fptrunc .. to</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4714,10 +4820,11 @@ entry:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_fpext">'<tt>fpext .. to</tt>' Instruction</a>
-</div>
-<div class="doc_text">
+</h4>
+
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4743,17 +4850,18 @@ entry:
 
 <h5>Example:</h5>
 <pre>
-  %X = fpext float 3.1415 to double        <i>; yields double:3.1415</i>
-  %Y = fpext float 1.0 to float            <i>; yields float:1.0 (no-op)</i>
+  %X = fpext float 3.125 to double         <i>; yields double:3.125000e+00</i>
+  %Y = fpext double %X to fp128            <i>; yields fp128:0xL00000000000000004000900000000000</i>
 </pre>
 
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_fptoui">'<tt>fptoui .. to</tt>' Instruction</a>
-</div>
-<div class="doc_text">
+</h4>
+
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4787,10 +4895,11 @@ entry:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_fptosi">'<tt>fptosi .. to</tt>' Instruction</a>
-</div>
-<div class="doc_text">
+</h4>
+
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4825,10 +4934,11 @@ entry:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_uitofp">'<tt>uitofp .. to</tt>' Instruction</a>
-</div>
-<div class="doc_text">
+</h4>
+
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4861,10 +4971,11 @@ entry:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_sitofp">'<tt>sitofp .. to</tt>' Instruction</a>
-</div>
-<div class="doc_text">
+</h4>
+
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4896,10 +5007,11 @@ entry:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_ptrtoint">'<tt>ptrtoint .. to</tt>' Instruction</a>
-</div>
-<div class="doc_text">
+</h4>
+
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4933,10 +5045,11 @@ entry:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_inttoptr">'<tt>inttoptr .. to</tt>' Instruction</a>
-</div>
-<div class="doc_text">
+</h4>
+
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -4970,10 +5083,11 @@ entry:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_bitcast">'<tt>bitcast .. to</tt>' Instruction</a>
-</div>
-<div class="doc_text">
+</h4>
+
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5012,21 +5126,24 @@ entry:
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection"> <a name="otherops">Other Operations</a> </div>
+<h3>
+  <a name="otherops">Other Operations</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The instructions in this category are the "miscellaneous" instructions, which
    defy better classification.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="i_icmp">'<tt>icmp</tt>' Instruction</a>
-</div>
+<h4>
+  <a name="i_icmp">'<tt>icmp</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5125,10 +5242,11 @@ entry:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="i_fcmp">'<tt>fcmp</tt>' Instruction</a>
-</div>
+<h4>
+  <a name="i_fcmp">'<tt>fcmp</tt>' Instruction</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5245,11 +5363,11 @@ entry:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="i_phi">'<tt>phi</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5293,11 +5411,11 @@ Loop:       ; Infinite loop that counts from 0 on up...
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
    <a name="i_select">'<tt>select</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5336,11 +5454,11 @@ Loop:       ; Infinite loop that counts from 0 on up...
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="i_call">'<tt>call</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5445,11 +5563,11 @@ freestanding environments and non-C-based languages.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="i_va_arg">'<tt>va_arg</tt>' Instruction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5490,11 +5608,15 @@ freestanding environments and non-C-based languages.</p>
 
 </div>
 
+</div>
+
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"> <a name="intrinsics">Intrinsic Functions</a> </div>
+<h2><a name="intrinsics">Intrinsic Functions</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>LLVM supports the notion of an "intrinsic function".  These functions have
    well known names and semantics and are required to follow certain
@@ -5537,14 +5659,12 @@ freestanding environments and non-C-based languages.</p>
 <p>To learn how to add an intrinsic function, please see the
    <a href="ExtendingLLVM.html">Extending LLVM Guide</a>.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="int_varargs">Variable Argument Handling Intrinsics</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Variable argument support is defined in LLVM with
    the <a href="#i_va_arg"><tt>va_arg</tt></a> instruction and these three
@@ -5586,15 +5706,13 @@ declare void @llvm.va_copy(i8*, i8*)
 declare void @llvm.va_end(i8*)
 </pre>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_va_start">'<tt>llvm.va_start</tt>' Intrinsic</a>
-</div>
+</h4>
 
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5620,11 +5738,11 @@ declare void @llvm.va_end(i8*)
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
  <a name="int_va_end">'<tt>llvm.va_end</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5651,11 +5769,11 @@ declare void @llvm.va_end(i8*)
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_va_copy">'<tt>llvm.va_copy</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5681,12 +5799,14 @@ declare void @llvm.va_end(i8*)
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="int_gc">Accurate Garbage Collection Intrinsics</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM support for <a href="GarbageCollection.html">Accurate Garbage
 Collection</a> (GC) requires the implementation and generation of these
@@ -5701,14 +5821,12 @@ LLVM</a>.</p>
 <p>The garbage collection intrinsics only operate on objects in the generic
    address space (address space zero).</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_gcroot">'<tt>llvm.gcroot</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5735,11 +5853,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_gcread">'<tt>llvm.gcread</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5767,11 +5885,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_gcwrite">'<tt>llvm.gcwrite</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5798,24 +5916,24 @@ LLVM</a>.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="int_codegen">Code Generator Intrinsics</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>These intrinsics are provided by LLVM to expose special features that may
    only be implemented with code generator support.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_returnaddress">'<tt>llvm.returnaddress</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5846,11 +5964,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_frameaddress">'<tt>llvm.frameaddress</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5880,11 +5998,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_stacksave">'<tt>llvm.stacksave</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5910,11 +6028,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_stackrestore">'<tt>llvm.stackrestore</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5935,11 +6053,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5968,11 +6086,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_pcmarker">'<tt>llvm.pcmarker</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -5999,11 +6117,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_readcyclecounter">'<tt>llvm.readcyclecounter</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -6025,26 +6143,26 @@ LLVM</a>.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="int_libc">Standard C Library Intrinsics</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM provides intrinsics for a few important standard C library functions.
    These intrinsics allow source-language front-ends to pass information about
    the alignment of the pointer arguments to the code generator, providing
    opportunity for more efficient code generation.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_memcpy">'<tt>llvm.memcpy</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.memcpy</tt> on any
@@ -6094,11 +6212,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_memmove">'<tt>llvm.memmove</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use llvm.memmove on any integer bit
@@ -6150,11 +6268,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_memset">'<tt>llvm.memset.*</tt>' Intrinsics</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use llvm.memset on any integer bit
@@ -6200,11 +6318,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_sqrt">'<tt>llvm.sqrt.*</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.sqrt</tt> on any
@@ -6238,11 +6356,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_powi">'<tt>llvm.powi.*</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.powi</tt> on any
@@ -6274,11 +6392,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_sin">'<tt>llvm.sin.*</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.sin</tt> on any
@@ -6308,11 +6426,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_cos">'<tt>llvm.cos.*</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.cos</tt> on any
@@ -6342,11 +6460,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_pow">'<tt>llvm.pow.*</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.pow</tt> on any
@@ -6376,24 +6494,24 @@ LLVM</a>.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="int_manip">Bit Manipulation Intrinsics</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM provides intrinsics for a few important bit manipulation operations.
    These allow efficient code generation for some algorithms.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_bswap">'<tt>llvm.bswap.*</tt>' Intrinsics</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic function. You can use bswap on any integer
@@ -6424,11 +6542,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_ctpop">'<tt>llvm.ctpop.*</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use llvm.ctpop on any integer bit
@@ -6456,11 +6574,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_ctlz">'<tt>llvm.ctlz.*</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.ctlz</tt> on any
@@ -6490,11 +6608,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_cttz">'<tt>llvm.cttz.*</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.cttz</tt> on any
@@ -6523,23 +6641,25 @@ LLVM</a>.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="int_overflow">Arithmetic with Overflow Intrinsics</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM provides intrinsics for some arithmetic with overflow operations.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="int_sadd_overflow">'<tt>llvm.sadd.with.overflow.*</tt>' Intrinsics</a>
-</div>
+<h4>
+  <a name="int_sadd_overflow">
+    '<tt>llvm.sadd.with.overflow.*</tt>' Intrinsics
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.sadd.with.overflow</tt>
@@ -6581,11 +6701,13 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="int_uadd_overflow">'<tt>llvm.uadd.with.overflow.*</tt>' Intrinsics</a>
-</div>
+<h4>
+  <a name="int_uadd_overflow">
+    '<tt>llvm.uadd.with.overflow.*</tt>' Intrinsics
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.uadd.with.overflow</tt>
@@ -6626,11 +6748,13 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="int_ssub_overflow">'<tt>llvm.ssub.with.overflow.*</tt>' Intrinsics</a>
-</div>
+<h4>
+  <a name="int_ssub_overflow">
+    '<tt>llvm.ssub.with.overflow.*</tt>' Intrinsics
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.ssub.with.overflow</tt>
@@ -6672,11 +6796,13 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="int_usub_overflow">'<tt>llvm.usub.with.overflow.*</tt>' Intrinsics</a>
-</div>
+<h4>
+  <a name="int_usub_overflow">
+    '<tt>llvm.usub.with.overflow.*</tt>' Intrinsics
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.usub.with.overflow</tt>
@@ -6718,11 +6844,13 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="int_smul_overflow">'<tt>llvm.smul.with.overflow.*</tt>' Intrinsics</a>
-</div>
+<h4>
+  <a name="int_smul_overflow">
+    '<tt>llvm.smul.with.overflow.*</tt>' Intrinsics
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.smul.with.overflow</tt>
@@ -6765,11 +6893,13 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="int_umul_overflow">'<tt>llvm.umul.with.overflow.*</tt>' Intrinsics</a>
-</div>
+<h4>
+  <a name="int_umul_overflow">
+    '<tt>llvm.umul.with.overflow.*</tt>' Intrinsics
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.umul.with.overflow</tt>
@@ -6810,12 +6940,14 @@ LLVM</a>.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="int_fp16">Half Precision Floating Point Intrinsics</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Half precision floating point is a storage-only format. This means that it is
    a dense encoding (in memory) but does not support computation in the
@@ -6829,14 +6961,15 @@ LLVM</a>.</p>
    float if needed, then converted to i16 with
    <a href="#int_convert_to_fp16"><tt>llvm.convert.to.fp16</tt></a>, then
    storing as an i16 value.</p>
-</div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="int_convert_to_fp16">'<tt>llvm.convert.to.fp16</tt>' Intrinsic</a>
-</div>
+<h4>
+  <a name="int_convert_to_fp16">
+    '<tt>llvm.convert.to.fp16</tt>' Intrinsic
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -6867,11 +7000,13 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
- <a name="int_convert_from_fp16">'<tt>llvm.convert.from.fp16</tt>' Intrinsic</a>
-</div>
+<h4>
+  <a name="int_convert_from_fp16">
+    '<tt>llvm.convert.from.fp16</tt>' Intrinsic
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -6901,12 +7036,14 @@ LLVM</a>.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="int_debugger">Debugger Intrinsics</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM debugger intrinsics (which all start with <tt>llvm.dbg.</tt>
    prefix), are described in
@@ -6916,11 +7053,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="int_eh">Exception Handling Intrinsics</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM exception handling intrinsics (which all start with
    <tt>llvm.eh.</tt> prefix), are described in
@@ -6930,11 +7067,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="int_trampoline">Trampoline Intrinsic</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>This intrinsic makes it possible to excise one parameter, marked with
    the <a href="#nest"><tt>nest</tt></a> attribute, from a function.
@@ -6960,14 +7097,14 @@ LLVM</a>.</p>
 <p>The call <tt>%val = call i32 %fp(i32 %x, i32 %y)</tt> is then equivalent
    to <tt>%val = call i32 %f(i8* %nval, i32 %x, i32 %y)</tt>.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="int_it">'<tt>llvm.init.trampoline</tt>' Intrinsic</a>
-</div>
+<h4>
+  <a name="int_it">
+    '<tt>llvm.init.trampoline</tt>' Intrinsic
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -7004,12 +7141,14 @@ LLVM</a>.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="int_atomics">Atomic Operations and Synchronization Intrinsics</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>These intrinsic functions expand the "universal IR" of LLVM to represent
    hardware constructs for atomic operations and memory synchronization.  This
@@ -7029,13 +7168,12 @@ LLVM</a>.</p>
    No one model or paradigm should be selected above others unless the hardware
    itself ubiquitously does so.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_memory_barrier">'<tt>llvm.memory.barrier</tt>' Intrinsic</a>
-</div>
-<div class="doc_text">
+</h4>
+
+<div>
 <h5>Syntax:</h5>
 <pre>
   declare void @llvm.memory.barrier(i1 &lt;ll&gt;, i1 &lt;ls&gt;, i1 &lt;sl&gt;, i1 &lt;ss&gt;, i1 &lt;device&gt;)
@@ -7103,11 +7241,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_atomic_cmp_swap">'<tt>llvm.atomic.cmp.swap.*</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.cmp.swap</tt> on
@@ -7163,10 +7301,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_atomic_swap">'<tt>llvm.atomic.swap.*</tt>' Intrinsic</a>
-</div>
-<div class="doc_text">
+</h4>
+
+<div>
 <h5>Syntax:</h5>
 
 <p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.swap</tt> on any
@@ -7219,12 +7358,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_atomic_load_add">'<tt>llvm.atomic.load.add.*</tt>' Intrinsic</a>
+</h4>
 
-</div>
-
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.load.add</tt> on
@@ -7269,12 +7407,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_atomic_load_sub">'<tt>llvm.atomic.load.sub.*</tt>' Intrinsic</a>
+</h4>
 
-</div>
-
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.load.sub</tt> on
@@ -7321,14 +7458,25 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="int_atomic_load_and">'<tt>llvm.atomic.load.and.*</tt>' Intrinsic</a><br>
-  <a name="int_atomic_load_nand">'<tt>llvm.atomic.load.nand.*</tt>' Intrinsic</a><br>
-  <a name="int_atomic_load_or">'<tt>llvm.atomic.load.or.*</tt>' Intrinsic</a><br>
-  <a name="int_atomic_load_xor">'<tt>llvm.atomic.load.xor.*</tt>' Intrinsic</a><br>
-</div>
-
-<div class="doc_text">
+<h4>
+  <a name="int_atomic_load_and">
+    '<tt>llvm.atomic.load.and.*</tt>' Intrinsic
+  </a>
+  <br>
+  <a name="int_atomic_load_nand">
+    '<tt>llvm.atomic.load.nand.*</tt>' Intrinsic
+  </a>
+  <br>
+  <a name="int_atomic_load_or">
+    '<tt>llvm.atomic.load.or.*</tt>' Intrinsic
+  </a>
+  <br>
+  <a name="int_atomic_load_xor">
+    '<tt>llvm.atomic.load.xor.*</tt>' Intrinsic
+  </a>
+</h4>
+
+<div>
 
 <h5>Syntax:</h5>
 <p>These are overloaded intrinsics. You can
@@ -7401,14 +7549,25 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="int_atomic_load_max">'<tt>llvm.atomic.load.max.*</tt>' Intrinsic</a><br>
-  <a name="int_atomic_load_min">'<tt>llvm.atomic.load.min.*</tt>' Intrinsic</a><br>
-  <a name="int_atomic_load_umax">'<tt>llvm.atomic.load.umax.*</tt>' Intrinsic</a><br>
-  <a name="int_atomic_load_umin">'<tt>llvm.atomic.load.umin.*</tt>' Intrinsic</a><br>
-</div>
-
-<div class="doc_text">
+<h4>
+  <a name="int_atomic_load_max">
+    '<tt>llvm.atomic.load.max.*</tt>' Intrinsic
+  </a>
+  <br>
+  <a name="int_atomic_load_min">
+    '<tt>llvm.atomic.load.min.*</tt>' Intrinsic
+  </a>
+  <br>
+  <a name="int_atomic_load_umax">
+    '<tt>llvm.atomic.load.umax.*</tt>' Intrinsic
+  </a>
+  <br>
+  <a name="int_atomic_load_umin">
+    '<tt>llvm.atomic.load.umin.*</tt>' Intrinsic
+  </a>
+</h4>
+
+<div>
 
 <h5>Syntax:</h5>
 <p>These are overloaded intrinsics. You can use <tt>llvm.atomic.load_max</tt>,
@@ -7479,25 +7638,24 @@ LLVM</a>.</p>
 
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="int_memorymarkers">Memory Use Markers</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>This class of intrinsics exists to information about the lifetime of memory
    objects and ranges where variables are immutable.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_lifetime_start">'<tt>llvm.lifetime.start</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -7523,11 +7681,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_lifetime_end">'<tt>llvm.lifetime.end</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -7552,11 +7710,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_invariant_start">'<tt>llvm.invariant.start</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -7580,11 +7738,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_invariant_end">'<tt>llvm.invariant.end</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -7606,24 +7764,24 @@ LLVM</a>.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="int_general">General Intrinsics</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>This class of intrinsics is designed to be generic and has no specific
    purpose.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_var_annotation">'<tt>llvm.var.annotation</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -7647,11 +7805,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_annotation">'<tt>llvm.annotation.*</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use '<tt>llvm.annotation</tt>' on
@@ -7683,11 +7841,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_trap">'<tt>llvm.trap</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -7708,11 +7866,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_stackprotector">'<tt>llvm.stackprotector</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -7742,11 +7900,11 @@ LLVM</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="int_objectsize">'<tt>llvm.objectsize</tt>' Intrinsic</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <h5>Syntax:</h5>
 <pre>
@@ -7776,6 +7934,10 @@ LLVM</a>.</p>
 
 </div>
 
+</div>
+
+</div>
+
 <!-- *********************************************************************** -->
 <hr>
 <address>
@@ -7785,8 +7947,8 @@ LLVM</a>.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-02-24 22:01:34 +0100 (Thu, 24 Feb 2011) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/Lexicon.html b/docs/Lexicon.html
index 09fae39..449e26e 100644
--- a/docs/Lexicon.html
+++ b/docs/Lexicon.html
@@ -9,12 +9,12 @@
   content="A glossary of terms used with the LLVM project.">
 </head>
 <body>
-<div class="doc_title">The LLVM Lexicon</div>
+<h1>The LLVM Lexicon</h1>
 <p class="doc_warning">NOTE: This document is a work in progress!</p>
 <!-- *********************************************************************** -->
-<div class="doc_section">Table Of Contents</div>
+<h2>Table Of Contents</h2>
 <!-- *********************************************************************** -->
-<div class="doc_text">
+<div>
   <table>
     <tr><th colspan="8"><b>- <a href="#A">A</a> -</b></th></tr>
     <tr>
@@ -83,19 +83,20 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">Definitions</div>
+<h2>Definitions</h2>
 <!-- *********************************************************************** -->
+<div>
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="A">- A -</a></div>
-<div class="doc_text">
+<h3><a name="A">- A -</a></h3>
+<div>
   <dl>
     <dt><a name="ADCE"><b>ADCE</b></a></dt>
     <dd>Aggressive Dead Code Elimination</dd>
   </dl>
 </div>
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="B">- B -</a></div>
-<div class="doc_text">
+<h3><a name="B">- B -</a></h3>
+<div>
   <dl>
     <dt><a name="BURS"><b>BURS</b></a></dt>
     <dd>Bottom Up Rewriting System&mdash;A method of instruction selection for
@@ -104,8 +105,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
   </dl>
 </div>
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="C">- C -</a></div>
-<div class="doc_text">
+<h3><a name="C">- C -</a></h3>
+<div>
   <dl>
     <dt><a name="CSE"><b>CSE</b></a></dt>
     <dd>Common Subexpression Elimination. An optimization that removes common
@@ -116,8 +117,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
   </dl>
 </div>
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="D">- D -</a></div>
-<div class="doc_text">
+<h3><a name="D">- D -</a></h3>
+<div>
   <dl>
     <dt><a name="DAG"><b>DAG</b></a></dt>
     <dd>Directed Acyclic Graph</dd>
@@ -136,8 +137,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
   </dl>
 </div>
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="G">- G -</a></div>
-<div class="doc_text">
+<h3><a name="G">- G -</a></h3>
+<div>
   <dl>
     <dt><a name="GC"><b>GC</b></a></dt>
     <dd>Garbage Collection. The practice of using reachability analysis instead
@@ -145,8 +146,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
   </dl>
 </div>
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="H">- H -</a></div>
-<div class="doc_text">
+<h3><a name="H">- H -</a></h3>
+<div>
   <dl>
     <dt><a name="Heap"><b>Heap</b></a></dt>
     <dd>In garbage collection, the region of memory which is managed using
@@ -154,8 +155,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
   </dl>
 </div>
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="I">- I -</a></div>
-<div class="doc_text">
+<h3><a name="I">- I -</a></h3>
+<div>
   <dl>
     <dt><a name="IPA"><b>IPA</b></a></dt>
     <dd>Inter-Procedural Analysis. Refers to any variety of code analysis that
@@ -169,8 +170,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
   </dl>
 </div>
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="L">- L -</a></div>
-<div class="doc_text">
+<h3><a name="L">- L -</a></h3>
+<div>
   <dl>
     <dt><a name="LCSSA"><b>LCSSA</b></a></dt>
     <dd>Loop-Closed Static Single Assignment Form</dd>
@@ -183,16 +184,16 @@ href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
   </dl>
 </div>
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="M">- M -</a></div>
-<div class="doc_text">
+<h3><a name="M">- M -</a></h3>
+<div>
   <dl>
     <dt><a name="MC"><b>MC</b></a></dt>
     <dd>Machine Code</dd>
   </dl>
 </div>
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="O">- O -</a></div>
-<div class="doc_text">
+<h3><a name="O">- O -</a></h3>
+<div>
   <dl>
     <dt><a name="Object_Pointer"><b>Object Pointer</b></a></dt>
     <dd>A pointer to an object such that the garbage collector is able to trace
@@ -202,8 +203,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="P">- P -</a></div>
-<div class="doc_text">
+<h3><a name="P">- P -</a></h3>
+<div>
   <dl>
     <dt><a name="PRE"><b>PRE</b></a></dt>
     <dd>Partial Redundancy Elimination</dd>
@@ -211,8 +212,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="R">- R -</a></div>
-<div class="doc_text">
+<h3><a name="R">- R -</a></h3>
+<div>
   <dl>
   	<dt><a name="RAUW"><b>RAUW</b></a></dt> <dd>An abbreviation for Replace
   	All Uses With. The functions User::replaceUsesOfWith(), 
@@ -234,8 +235,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="S">- S -</a></div>
-<div class="doc_text">
+<h3><a name="S">- S -</a></h3>
+<div>
   <dl>
     <dt><a name="Safe_Point"><b>Safe Point</b></a></dt>
     <dd>In garbage collection, it is necessary to identify <a href="#Root">stack
@@ -261,6 +262,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
     function.</dd>
   </dl>
 </div>
+
+</div>
 <!-- *********************************************************************** -->
 <hr>
 <address> <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
@@ -268,8 +271,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
  href="http://validator.w3.org/check/referer"><img
  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a><a
  href="http://llvm.org/">The LLVM Team</a><br>
-<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+<a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 <!-- vim: sw=2
 -->
diff --git a/docs/LinkTimeOptimization.html b/docs/LinkTimeOptimization.html
index 3033474..289da23 100644
--- a/docs/LinkTimeOptimization.html
+++ b/docs/LinkTimeOptimization.html
@@ -6,9 +6,9 @@
   <link rel="stylesheet" href="llvm.css" type="text/css">
 </head>
 
-<div class="doc_title">
+<h1>
   LLVM Link Time Optimization: Design and Implementation
-</div>
+</h1>
 
 <ul>
   <li><a href="#desc">Description</a></li>
@@ -36,12 +36,12 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
 <a name="desc">Description</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 <p>
 LLVM features powerful intermodular optimizations which can be used at link 
 time.  Link Time Optimization (LTO) is another name for intermodular optimization 
@@ -50,12 +50,12 @@ and design between the LTO optimizer and the linker.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
 <a name="design">Design Philosophy</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 <p>
 The LLVM Link Time Optimizer provides complete transparency, while doing 
 intermodular optimization, in the compiler tool chain. Its main goal is to let 
@@ -69,14 +69,13 @@ the linker and LLVM optimizer helps to do optimizations that are not possible
 in other models. The linker input allows the optimizer to avoid relying on 
 conservative escape analysis.
 </p>
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="example1">Example of link time optimization</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
   <p>The following example illustrates the advantages of LTO's integrated
   approach and clean interface. This example requires a system linker which
   supports LTO through the interface described in this document.  Here,
@@ -145,11 +144,11 @@ $ llvm-gcc a.o main.o -o main # &lt;-- standard link command without any modific
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="alternative_approaches">Alternative Approaches</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
   <dl>
     <dt><b>Compiler driver invokes link time optimizer separately.</b></dt>
     <dd>In this model the link time optimizer is not able to take advantage of 
@@ -175,12 +174,14 @@ $ llvm-gcc a.o main.o -o main # &lt;-- standard link command without any modific
   </dl>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="multiphase">Multi-phase communication between libLTO and linker</a>
-</div>
+</h2>
 
-<div class="doc_text">
+<div>
   <p>The linker collects information about symbol defininitions and uses in 
   various link objects which is more accurate than any information collected 
   by other tools during typical build cycles.  The linker collects this 
@@ -192,14 +193,13 @@ $ llvm-gcc a.o main.o -o main # &lt;-- standard link command without any modific
   Our goal is to take advantage of tight integration between the linker and 
   the optimizer by sharing this information during various linking phases.
 </p>
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="phase1">Phase 1 : Read LLVM Bitcode Files</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
   <p>The linker first reads all object files in natural order and collects 
   symbol information. This includes native object files as well as LLVM bitcode 
   files.  To minimize the cost to the linker in the case that all .o files
@@ -219,11 +219,11 @@ $ llvm-gcc a.o main.o -o main # &lt;-- standard link command without any modific
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="phase2">Phase 2 : Symbol Resolution</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
   <p>In this stage, the linker resolves symbols using global symbol table. 
   It may report undefined symbol errors, read archive members, replace 
   weak symbols, etc.  The linker is able to do this seamlessly even though it 
@@ -233,10 +233,10 @@ $ llvm-gcc a.o main.o -o main # &lt;-- standard link command without any modific
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="phase3">Phase 3 : Optimize Bitcode Files</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>After symbol resolution, the linker tells the LTO shared object which
   symbols are needed by native object files.  In the example above, the linker 
   reports that only <tt>foo1()</tt> is used by native object files using 
@@ -248,11 +248,11 @@ $ llvm-gcc a.o main.o -o main # &lt;-- standard link command without any modific
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="phase4">Phase 4 : Symbol Resolution after optimization</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
   <p>In this phase, the linker reads optimized a native object file and 
   updates the internal global symbol table to reflect any changes. The linker 
   also collects information about any changes in use of external symbols by 
@@ -264,12 +264,14 @@ $ llvm-gcc a.o main.o -o main # &lt;-- standard link command without any modific
   bitcode files.</p>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
 <a name="lto">libLTO</a>
-</div>
+</h2>
 
-<div class="doc_text">
+<div>
   <p><tt>libLTO</tt> is a shared object that is part of the LLVM tools, and 
   is intended for use by a linker. <tt>libLTO</tt> provides an abstract C 
   interface to use the LLVM interprocedural optimizer without exposing details 
@@ -278,14 +280,13 @@ $ llvm-gcc a.o main.o -o main # &lt;-- standard link command without any modific
   be possible for a completely different compilation technology to provide
   a different libLTO that works with their object files and the standard
   linker tool.</p>
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="lto_module_t">lto_module_t</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>A non-native object file is handled via an <tt>lto_module_t</tt>.  
 The following functions allow the linker to check if a file (on disk
@@ -325,11 +326,11 @@ lto_module_get_symbol_attribute(lto_module_t, unsigned int)
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="lto_code_gen_t">lto_code_gen_t</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Once the linker has loaded each non-native object files into an
 <tt>lto_module_t</tt>, it can request libLTO to process them all and
@@ -371,6 +372,8 @@ of the native object files.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 
 <hr>
@@ -381,8 +384,8 @@ of the native object files.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   Devang Patel and Nick Kledzik<br>
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-09-29 22:09:55 +0200 (Wed, 29 Sep 2010) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/MakefileGuide.html b/docs/MakefileGuide.html
index 6ceb09d..1e7c3469 100644
--- a/docs/MakefileGuide.html
+++ b/docs/MakefileGuide.html
@@ -7,7 +7,7 @@
 </head>
 <body>
 
-<div class="doc_title">LLVM Makefile Guide</div>
+<h1>LLVM Makefile Guide</h1>
 
 <ol>
   <li><a href="#introduction">Introduction</a></li>
@@ -77,10 +77,10 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="introduction">Introduction </a></div>
+<h2><a name="introduction">Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
   <p>This document provides <em>usage</em> information about the LLVM makefile 
   system. While loosely patterned after the BSD makefile system, LLVM has taken 
   a departure from BSD in order to implement additional features needed by LLVM.
@@ -99,20 +99,19 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="general">General Concepts</a></div>
+<h2><a name="general">General Concepts</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
   <p>The LLVM Makefile System is the component of LLVM that is responsible for
   building the software, testing it,  generating distributions, checking those
   distributions, installing and uninstalling, etc. It consists of a several
   files throughout the source tree. These files and other general concepts are
   described in this section.</p>
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="projects">Projects</a></div>
-<div class="doc_text">
+<h3><a name="projects">Projects</a></h3>
+<div>
   <p>The LLVM Makefile System is quite generous. It not only builds its own
   software, but it can build yours too. Built into the system is knowledge of
   the <tt>llvm/projects</tt> directory. Any directory under <tt>projects</tt>
@@ -129,8 +128,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="varvalues">Variable Values</a></div>
-<div class="doc_text">
+<h3><a name="varvalues">Variable Values</a></h3>
+<div>
   <p>To use the makefile system, you simply create a file named 
   <tt>Makefile</tt> in your directory and declare values for certain variables. 
   The variables and values that you select determine what the makefile system
@@ -139,16 +138,15 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="including">Including Makefiles</a></div>
-<div class="doc_text">
+<h3><a name="including">Including Makefiles</a></h3>
+<div>
   <p>Setting variables alone is not enough. You must include into your Makefile
   additional files that provide the rules of the LLVM Makefile system. The 
   various files involved are described in the sections that follow.</p>
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="Makefile">Makefile</a></div>
-<div class="doc_text">
+<h4><a name="Makefile">Makefile</a></h4>
+<div>
   <p>Each directory to participate in the build needs to have a file named
   <tt>Makefile</tt>. This is the file first read by <tt>make</tt>. It has three
   sections:</p>
@@ -163,9 +161,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="Makefile.common">Makefile.common</a>
-</div>
-<div class="doc_text">
+<h4><a name="Makefile.common">Makefile.common</a></h4>
+<div>
   <p>Every project must have a <tt>Makefile.common</tt> file at its top source 
   directory. This file serves three purposes:</p>
   <ol>
@@ -182,9 +179,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="Makefile.config">Makefile.config</a>
-</div>
-<div class="doc_text">
+<h4><a name="Makefile.config">Makefile.config</a></h4>
+<div>
   <p>Every project must have a <tt>Makefile.config</tt> at the top of its
   <em>build</em> directory. This file is <b>generated</b> by the
   <tt>configure</tt> script from the pattern provided by the
@@ -196,8 +192,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="Makefile.rules">Makefile.rules</a></div>
-<div class="doc_text">
+<h4><a name="Makefile.rules">Makefile.rules</a></h4>
+<div>
   <p>This file, located at <tt>$(LLVM_SRC_ROOT)/Makefile.rules</tt> is the heart
   of the LLVM Makefile System. It provides all the logic, dependencies, and
   rules for building the targets supported by the system. What it does largely
@@ -205,9 +201,11 @@
   have been set <em>before</em> <tt>Makefile.rules</tt> is included.
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="Comments">Comments</a></div>
-<div class="doc_text">
+<h3><a name="Comments">Comments</a></h3>
+<div>
   <p>User Makefiles need not have comments in them unless the construction is
   unusual or it does not strictly follow the rules and patterns of the LLVM
   makefile system. Makefile comments are invoked with the pound (#) character.
@@ -215,19 +213,20 @@
   by <tt>make</tt>.</p>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="tutorial">Tutorial</a></div>
+<h2><a name="tutorial">Tutorial</a></h2>
 <!-- *********************************************************************** -->
-<div class="doc_text">
+<div>
   <p>This section provides some examples of the different kinds of modules you
   can build with the LLVM makefile system. In general, each directory you 
   provide will build a single object although that object may be composed of
   additionally compiled components.</p>
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="libraries">Libraries</a></div>
-<div class="doc_text">
+<h3><a name="libraries">Libraries</a></h3>
+<div>
   <p>Only a few variable definitions are needed to build a regular library.
   Normally, the makefile system will build all the software into a single
   <tt>libname.o</tt> (pre-linked) object. This means the library is not
@@ -256,11 +255,10 @@
   <tt>-load</tt> option. See the 
   <a href="WritingAnLLVMPass.html#makefile">WritingAnLLVMPass.html</a> document
   for an example of why you might want to do this.
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="BCModules">Bitcode Modules</a></div>
-<div class="doc_text">
+<h4><a name="BCModules">Bitcode Modules</a></h4>
+<div>
   <p>In some situations, it is desirable to build a single bitcode module from
   a variety of sources, instead of an archive, shared library, or bitcode 
   library. Bitcode modules can be specified in addition to any of the other
@@ -280,10 +278,10 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="LoadableModules">Loadable Modules</a>
-</div>
-<div class="doc_text">
+</h4>
+<div>
   <p>In some situations, you need to create a loadable module. Loadable modules
   can be loaded into programs like <tt>opt</tt> or <tt>llc</tt> to specify
   additional passes to run or targets to support.  Loadable modules are also
@@ -311,9 +309,11 @@
   library which is part of <tt>lib/System</tt> implementation.</p>
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="tools">Tools</a></div>
-<div class="doc_text">
+<h3><a name="tools">Tools</a></h3>
+<div>
   <p>For building executable programs (tools), you must provide the name of the
   tool and the names of the libraries you wish to link with the tool. For
   example:</p>
@@ -344,11 +344,10 @@
   syntax is used. Note that in order to use the <tt>.a</tt> suffix, the library
   in question must have been built with the <tt>ARCHIVE_LIBRARY</tt> option set.
   </p>
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="JIT">JIT Tools</a></div>
-<div class="doc_text">
+<h4><a name="JIT">JIT Tools</a></h4>
+<div>
   <p>Many tools will want to use the JIT features of LLVM.  To do this, you
      simply specify that you want an execution 'engine', and the makefiles will
      automatically link in the appropriate JIT for the host or an interpreter
@@ -367,11 +366,15 @@
   </tt></pre>
 </div>
 
+</div>
+
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="targets">Targets Supported</a></div>
+<h2><a name="targets">Targets Supported</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
   <p>This section describes each of the targets that can be built using the LLVM
   Makefile system. Any target can be invoked from any directory but not all are
   applicable to a given directory (e.g. "check", "dist" and "install" will
@@ -426,11 +429,10 @@
       <td>Remove built objects from installation directory.
     </td></tr>
   </table>
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="all">all (default)</a></div>
-<div class="doc_text">
+<h3><a name="all">all (default)</a></h3>
+<div>
   <p>When you invoke <tt>make</tt> with no arguments, you are implicitly
   instructing it to seek the "all" target (goal). This target is used for
   building the software recursively and will do different things in different 
@@ -440,15 +442,15 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="all-local">all-local</a></div>
-<div class="doc_text">
+<h3><a name="all-local">all-local</a></h3>
+<div>
   <p>This target is the same as <a href="#all">all</a> but it operates only on
   the current directory instead of recursively.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="check">check</a></div>
-<div class="doc_text">
+<h3><a name="check">check</a></h3>
+<div>
   <p>This target can be invoked from anywhere within a project's directories
   but always invokes the <a href="#check-local"><tt>check-local</tt></a> target 
   in the project's <tt>test</tt> directory, if it exists and has a 
@@ -464,8 +466,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="check-local">check-local</a></div>
-<div class="doc_text">
+<h3><a name="check-local">check-local</a></h3>
+<div>
   <p>This target should be implemented by the <tt>Makefile</tt> in the project's
   <tt>test</tt> directory. It is invoked by the <tt>check</tt> target elsewhere.
   Each project is free to define the actions of <tt>check-local</tt> as 
@@ -475,8 +477,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="clean">clean</a></div>
-<div class="doc_text">
+<h3><a name="clean">clean</a></h3>
+<div>
   <p>This target cleans the build directory, recursively removing all things
   that the Makefile builds. The cleaning rules have been made guarded so they 
   shouldn't go awry (via <tt>rm -f $(UNSET_VARIABLE)/*</tt> which will attempt
@@ -484,15 +486,15 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="clean-local">clean-local</a></div>
-<div class="doc_text">
+<h3><a name="clean-local">clean-local</a></h3>
+<div>
   <p>This target does the same thing as <tt>clean</tt> but only for the current
   (local) directory.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="dist">dist</a></div>
-<div class="doc_text">
+<h3><a name="dist">dist</a></h3>
+<div>
   <p>This target builds a distribution tarball. It first builds the entire
   project using the <tt>all</tt> target and then tars up the necessary files and
   compresses it. The generated tarball is sufficient for a casual source 
@@ -500,8 +502,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="dist-check">dist-check</a></div>
-<div class="doc_text">
+<h3><a name="dist-check">dist-check</a></h3>
+<div>
   <p>This target does the same thing as the <tt>dist</tt> target but also checks
   the distribution tarball. The check is made by unpacking the tarball to a new
   directory, configuring it, building it, installing it, and then verifying that
@@ -512,16 +514,16 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="dist-clean">dist-clean</a></div>
-<div class="doc_text">
+<h3><a name="dist-clean">dist-clean</a></h3>
+<div>
   <p>This is a special form of the <tt>clean</tt> clean target. It performs a
   normal <tt>clean</tt> but also removes things pertaining to building the
   distribution.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="install">install</a></div>
-<div class="doc_text">
+<h3><a name="install">install</a></h3>
+<div>
   <p>This target finalizes shared objects and executables and copies all
   libraries, headers, executables and documentation to the directory given 
   with the <tt>--prefix</tt> option to <tt>configure</tt>.  When completed, 
@@ -538,8 +540,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="preconditions">preconditions</a></div>
-<div class="doc_text">
+<h3><a name="preconditions">preconditions</a></h3>
+<div>
   <p>This utility target checks to see if the <tt>Makefile</tt> in the object
   directory is older than the <tt>Makefile</tt> in the source directory and
   copies it if so. It also reruns the <tt>configure</tt> script if that needs to
@@ -549,15 +551,15 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="printvars">printvars</a></div>
-<div class="doc_text">
+<h3><a name="printvars">printvars</a></h3>
+<div>
   <p>This utility target just causes the LLVM makefiles to print out some of 
   the makefile variables so that you can double check how things are set. </p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="reconfigure">reconfigure</a></div>
-<div class="doc_text">
+<h3><a name="reconfigure">reconfigure</a></h3>
+<div>
   <p>This utility target will force a reconfigure of LLVM or your project. It 
   simply runs <tt>$(PROJ_OBJ_ROOT)/config.status --recheck</tt> to rerun the
   configuration tests and rebuild the configured files. This isn't generally
@@ -566,8 +568,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="spotless">spotless</a></div>
-<div class="doc_text">
+<h3><a name="spotless">spotless</a></h3>
+<div>
   <p>This utility target, only available when <tt>$(PROJ_OBJ_ROOT)</tt> is not 
   the same as <tt>$(PROJ_SRC_ROOT)</tt>, will completely clean the
   <tt>$(PROJ_OBJ_ROOT)</tt> directory by removing its content entirely and 
@@ -578,8 +580,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="tags">tags</a></div>
-<div class="doc_text">
+<h3><a name="tags">tags</a></h3>
+<div>
   <p>This target will generate a <tt>TAGS</tt> file in the top-level source
   directory. It is meant for use with emacs, XEmacs, or ViM. The TAGS file
   provides an index of symbol definitions so that the editor can jump you to the
@@ -587,18 +589,20 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="uninstall">uninstall</a></div>
-<div class="doc_text">
+<h3><a name="uninstall">uninstall</a></h3>
+<div>
   <p>This target is the opposite of the <tt>install</tt> target. It removes the
   header, library and executable files from the installation directories. Note
   that the directories themselves are not removed because it is not guaranteed
   that LLVM is the only thing installing there (e.g. --prefix=/usr).</p>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="variables">Variables</a></div>
+<h2><a name="variables">Variables</a></h2>
 <!-- *********************************************************************** -->
-<div class="doc_text">
+<div>
   <p>Variables are used to tell the LLVM Makefile System what to do and to
   obtain information from it. Variables are also used internally by the LLVM
   Makefile System. Variable names that contain only the upper case alphabetic
@@ -606,11 +610,10 @@
   variables are internal to the LLVM Makefile System and should not be relied
   upon nor modified. The sections below describe how to use the LLVM Makefile 
   variables.</p>
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="setvars">Control Variables</a></div>
-<div class="doc_text">
+<h3><a name="setvars">Control Variables</a></h3>
+<div>
   <p>Variables listed in the table below should be set <em>before</em> the 
   inclusion of <a href="#Makefile.common"><tt>$(LEVEL)/Makefile.common</tt></a>.
   These variables provide input to the LLVM make system that tell it what to do 
@@ -762,8 +765,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="overvars">Override Variables</a></div>
-<div class="doc_text">
+<h3><a name="overvars">Override Variables</a></h3>
+<div>
   <p>Override variables can be used to override the default
   values provided by the LLVM makefile system. These variables can be set in 
   several ways:</p>
@@ -868,8 +871,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="getvars">Readable Variables</a></div>
-<div class="doc_text">
+<h3><a name="getvars">Readable Variables</a></h3>
+<div>
   <p>Variables listed in the table below can be used by the user's Makefile but
   should not be changed. Changing the value will generally cause the build to go
   wrong, so don't do it.</p>
@@ -939,8 +942,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="intvars">Internal Variables</a></div>
-<div class="doc_text">
+<h3><a name="intvars">Internal Variables</a></h3>
+<div>
   <p>Variables listed below are used by the LLVM Makefile System 
   and considered internal. You should not use these variables under any
   circumstances.</p>
@@ -1018,6 +1021,8 @@
   </tt></p>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 <hr>
 <address>
@@ -1027,8 +1032,8 @@
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-10-22 14:54:34 +0200 (Fri, 22 Oct 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/Packaging.html b/docs/Packaging.html
index 7aa4a1c..b932972 100644
--- a/docs/Packaging.html
+++ b/docs/Packaging.html
@@ -7,7 +7,7 @@
 </head>
 <body>
 
-<div class="doc_title">Advice on Packaging LLVM</div>
+<h1>Advice on Packaging LLVM</h1>
 <ol>
   <li><a href="#overview">Overview</a></li>
   <li><a href="#compilation">Compile Flags</a></li>
@@ -17,9 +17,9 @@
 </ol>
 
 <!--=========================================================================-->
-<div class="doc_section"><a name="overview">Overview</a></div>
+<h2><a name="overview">Overview</a></h2>
 <!--=========================================================================-->
-<div class="doc_text">
+<div>
 
 <p>LLVM sets certain default configure options to make sure our developers don't
 break things for constrained platforms.  These settings are not optimal for most
@@ -34,9 +34,9 @@ developed against each.
 </div>
 
 <!--=========================================================================-->
-<div class="doc_section"><a name="compilation">Compile Flags</a></div>
+<h2><a name="compilation">Compile Flags</a></h2>
 <!--=========================================================================-->
-<div class="doc_text">
+<div>
 
 <p>LLVM runs much more quickly when it's optimized and assertions are removed.
 However, such a build is currently incompatible with users who build without
@@ -65,9 +65,9 @@ versions of LLVM in parallel.  The following configure flags are relevant:
 </div>
 
 <!--=========================================================================-->
-<div class="doc_section"><a name="cxx-features">C++ Features</a></div>
+<h2><a name="cxx-features">C++ Features</a></h2>
 <!--=========================================================================-->
-<div class="doc_text">
+<div>
 
 <dl>
   <dt>RTTI</dt><dd>LLVM disables RTTI by default.  Add <tt>REQUIRES_RTTI=1</tt>
@@ -78,9 +78,9 @@ versions of LLVM in parallel.  The following configure flags are relevant:
 </div>
 
 <!--=========================================================================-->
-<div class="doc_section"><a name="shared-library">Shared Library</a></div>
+<h2><a name="shared-library">Shared Library</a></h2>
 <!--=========================================================================-->
-<div class="doc_text">
+<div>
 
 <p>Configure with <tt>--enable-shared</tt> to build
 <tt>libLLVM-<var>major</var>.<var>minor</var>.(so|dylib)</tt> and link the tools
@@ -89,9 +89,9 @@ against it.  This saves lots of binary size at the cost of some startup time.
 </div>
 
 <!--=========================================================================-->
-<div class="doc_section"><a name="deps">Dependencies</a></div>
+<h2><a name="deps">Dependencies</a></h2>
 <!--=========================================================================-->
-<div class="doc_text">
+<div>
 
 <dl>
 <dt><tt>--enable-libffi</tt></dt><dd>Depend on <a
@@ -111,8 +111,8 @@ line numbers.</dd>
   src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/Passes.html b/docs/Passes.html
index fb2aff5..ca9602c 100644
--- a/docs/Passes.html
+++ b/docs/Passes.html
@@ -40,7 +40,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 
   -->
 
-<div class="doc_title">LLVM's Analysis and Transform Passes</div>
+<h1>LLVM's Analysis and Transform Passes</h1>
 
 <ol>
   <li><a href="#intro">Introduction</a></li>
@@ -55,8 +55,8 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_section"> <a name="intro">Introduction</a> </div>
-<div class="doc_text">
+<h2><a name="intro">Introduction</a></h2>
+<div>
   <p>This document serves as a high level summary of the optimization features 
   that LLVM provides. Optimizations are implemented as Passes that traverse some
   portion of a program to either collect information or transform the program.
@@ -69,15 +69,13 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
   bitcode are neither analysis nor transform passes.
   <p>The table below provides a quick summary of each pass and links to the more
   complete pass description later in the document.</p>
-</div>
-<div class="doc_text" >
+
 <table>
 <tr><th colspan="2"><b>ANALYSIS PASSES</b></th></tr>
 <tr><th>Option</th><th>Name</th></tr>
 <tr><td><a href="#aa-eval">-aa-eval</a></td><td>Exhaustive Alias Analysis Precision Evaluator</td></tr>
-<tr><td><a href="#basicaa">-basicaa</a></td><td>Basic Alias Analysis (default AA impl)</td></tr>
+<tr><td><a href="#basicaa">-basicaa</a></td><td>Basic Alias Analysis (stateless AA impl)</td></tr>
 <tr><td><a href="#basiccg">-basiccg</a></td><td>Basic CallGraph Construction</td></tr>
-<tr><td><a href="#codegenprepare">-codegenprepare</a></td><td>Optimize for code generation</td></tr>
 <tr><td><a href="#count-aa">-count-aa</a></td><td>Count Alias Analysis Query Responses</td></tr>
 <tr><td><a href="#debug-aa">-debug-aa</a></td><td>AA use debugger</td></tr>
 <tr><td><a href="#domfrontier">-domfrontier</a></td><td>Dominance Frontier Construction</td></tr>
@@ -85,27 +83,23 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 <tr><td><a href="#dot-callgraph">-dot-callgraph</a></td><td>Print Call Graph to 'dot' file</td></tr>
 <tr><td><a href="#dot-cfg">-dot-cfg</a></td><td>Print CFG of function to 'dot' file</td></tr>
 <tr><td><a href="#dot-cfg-only">-dot-cfg-only</a></td><td>Print CFG of function to 'dot' file (with no function bodies)</td></tr>
-<tr><td><a href="#dot-dom">-dot-dom</a></td><td>Print dominator tree of function to 'dot' file</td></tr>
-<tr><td><a href="#dot-dom-only">-dot-dom-only</a></td><td>Print dominator tree of function to 'dot' file (with no function bodies)</td></tr>
-<tr><td><a href="#dot-postdom">-dot-postdom</a></td><td>Print post dominator tree of function to 'dot' file</td></tr>
-<tr><td><a href="#dot-postdom-only">-dot-postdom-only</a></td><td>Print post dominator tree of function to 'dot' file (with no function bodies)</td></tr>
+<tr><td><a href="#dot-dom">-dot-dom</a></td><td>Print dominance tree of function to 'dot' file</td></tr>
+<tr><td><a href="#dot-dom-only">-dot-dom-only</a></td><td>Print dominance tree of function to 'dot' file (with no function bodies)</td></tr>
+<tr><td><a href="#dot-postdom">-dot-postdom</a></td><td>Print postdominance tree of function to 'dot' file</td></tr>
+<tr><td><a href="#dot-postdom-only">-dot-postdom-only</a></td><td>Print postdominance tree of function to 'dot' file (with no function bodies)</td></tr>
 <tr><td><a href="#globalsmodref-aa">-globalsmodref-aa</a></td><td>Simple mod/ref analysis for globals</td></tr>
 <tr><td><a href="#instcount">-instcount</a></td><td>Counts the various types of Instructions</td></tr>
-<tr><td><a href="#interprocedural-aa-eval">-interprocedural-aa-eval</a></td><td>Exhaustive Interprocedural Alias Analysis Precision Evaluator</td></tr>
-<tr><td><a href="#interprocedural-basic-aa">-interprocedural-basic-aa</a></td><td>Interprocedural Basic Alias Analysis</td></tr>
 <tr><td><a href="#intervals">-intervals</a></td><td>Interval Partition Construction</td></tr>
 <tr><td><a href="#iv-users">-iv-users</a></td><td>Induction Variable Users</td></tr>
 <tr><td><a href="#lazy-value-info">-lazy-value-info</a></td><td>Lazy Value Information Analysis</td></tr>
 <tr><td><a href="#lda">-lda</a></td><td>Loop Dependence Analysis</td></tr>
 <tr><td><a href="#libcall-aa">-libcall-aa</a></td><td>LibCall Alias Analysis</td></tr>
-<tr><td><a href="#lint">-lint</a></td><td>Check for common errors in LLVM IR</td></tr>
-<tr><td><a href="#live-values">-live-values</a></td><td>Value Liveness Analysis</td></tr>
+<tr><td><a href="#lint">-lint</a></td><td>Statically lint-checks LLVM IR</td></tr>
 <tr><td><a href="#loops">-loops</a></td><td>Natural Loop Information</td></tr>
 <tr><td><a href="#memdep">-memdep</a></td><td>Memory Dependence Analysis</td></tr>
-<tr><td><a href="#module-debuginfo">-module-debuginfo</a></td><td>Prints module debug info metadata</td></tr>
+<tr><td><a href="#module-debuginfo">-module-debuginfo</a></td><td>Decodes module-level debug info</td></tr>
 <tr><td><a href="#no-aa">-no-aa</a></td><td>No Alias Analysis (always returns 'may' alias)</td></tr>
 <tr><td><a href="#no-profile">-no-profile</a></td><td>No Profile Information</td></tr>
-<tr><td><a href="#pointertracking">-pointertracking</a></td><td>Track pointer bounds</td></tr>
 <tr><td><a href="#postdomfrontier">-postdomfrontier</a></td><td>Post-Dominance Frontier Construction</td></tr>
 <tr><td><a href="#postdomtree">-postdomtree</a></td><td>Post-Dominator Tree Construction</td></tr>
 <tr><td><a href="#print-alias-sets">-print-alias-sets</a></td><td>Alias Set Printer</td></tr>
@@ -120,8 +114,8 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 <tr><td><a href="#print-used-types">-print-used-types</a></td><td>Find Used Types</td></tr>
 <tr><td><a href="#profile-estimator">-profile-estimator</a></td><td>Estimate profiling information</td></tr>
 <tr><td><a href="#profile-loader">-profile-loader</a></td><td>Load profile information from llvmprof.out</td></tr>
-<tr><td><a href="#regions">-regions</a></td><td>Detect single entry single exit regions in a function</td></tr>
 <tr><td><a href="#profile-verifier">-profile-verifier</a></td><td>Verify profiling information</td></tr>
+<tr><td><a href="#regions">-regions</a></td><td>Detect single entry single exit regions</td></tr>
 <tr><td><a href="#scalar-evolution">-scalar-evolution</a></td><td>Scalar Evolution Analysis</td></tr>
 <tr><td><a href="#scev-aa">-scev-aa</a></td><td>ScalarEvolution-based Alias Analysis</td></tr>
 <tr><td><a href="#targetdata">-targetdata</a></td><td>Target Data Layout</td></tr>
@@ -129,13 +123,12 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 
 <tr><th colspan="2"><b>TRANSFORM PASSES</b></th></tr>
 <tr><th>Option</th><th>Name</th></tr>
-<tr><td><a href="#abcd">-abcd</a></td><td>Remove redundant conditional branches</td></tr>
 <tr><td><a href="#adce">-adce</a></td><td>Aggressive Dead Code Elimination</td></tr>
 <tr><td><a href="#always-inline">-always-inline</a></td><td>Inliner for always_inline functions</td></tr>
 <tr><td><a href="#argpromotion">-argpromotion</a></td><td>Promote 'by reference' arguments to scalars</td></tr>
 <tr><td><a href="#block-placement">-block-placement</a></td><td>Profile Guided Basic Block Placement</td></tr>
 <tr><td><a href="#break-crit-edges">-break-crit-edges</a></td><td>Break critical edges in CFG</td></tr>
-<tr><td><a href="#codegenprepare">-codegenprepare</a></td><td>Prepare a function for code generation </td></tr>
+<tr><td><a href="#codegenprepare">-codegenprepare</a></td><td>Optimize for code generation</td></tr>
 <tr><td><a href="#constmerge">-constmerge</a></td><td>Merge Duplicate Global Constants</td></tr>
 <tr><td><a href="#constprop">-constprop</a></td><td>Simple constant propagation</td></tr>
 <tr><td><a href="#dce">-dce</a></td><td>Dead Code Elimination</td></tr>
@@ -155,46 +148,40 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 <tr><td><a href="#internalize">-internalize</a></td><td>Internalize Global Symbols</td></tr>
 <tr><td><a href="#ipconstprop">-ipconstprop</a></td><td>Interprocedural constant propagation</td></tr>
 <tr><td><a href="#ipsccp">-ipsccp</a></td><td>Interprocedural Sparse Conditional Constant Propagation</td></tr>
-<tr><td><a href="#jump-threading">-jump-threading</a></td><td>Thread control through conditional blocks </td></tr>
+<tr><td><a href="#jump-threading">-jump-threading</a></td><td>Jump Threading</td></tr>
 <tr><td><a href="#lcssa">-lcssa</a></td><td>Loop-Closed SSA Form Pass</td></tr>
 <tr><td><a href="#licm">-licm</a></td><td>Loop Invariant Code Motion</td></tr>
-<tr><td><a href="#loop-deletion">-loop-deletion</a></td><td>Dead Loop Deletion Pass </td></tr>
+<tr><td><a href="#loop-deletion">-loop-deletion</a></td><td>Delete dead loops</td></tr>
 <tr><td><a href="#loop-extract">-loop-extract</a></td><td>Extract loops into new functions</td></tr>
 <tr><td><a href="#loop-extract-single">-loop-extract-single</a></td><td>Extract at most one loop into a new function</td></tr>
-<tr><td><a href="#loop-index-split">-loop-index-split</a></td><td>Index Split Loops</td></tr>
 <tr><td><a href="#loop-reduce">-loop-reduce</a></td><td>Loop Strength Reduction</td></tr>
 <tr><td><a href="#loop-rotate">-loop-rotate</a></td><td>Rotate Loops</td></tr>
+<tr><td><a href="#loop-simplify">-loop-simplify</a></td><td>Canonicalize natural loops</td></tr>
 <tr><td><a href="#loop-unroll">-loop-unroll</a></td><td>Unroll loops</td></tr>
 <tr><td><a href="#loop-unswitch">-loop-unswitch</a></td><td>Unswitch loops</td></tr>
-<tr><td><a href="#loop-simplify">-loop-simplify</a></td><td>Canonicalize natural loops</td></tr>
-<tr><td><a href="#loweratomic">-loweratomic</a></td><td>Lower atomic intrinsics</td></tr>
+<tr><td><a href="#loweratomic">-loweratomic</a></td><td>Lower atomic intrinsics to non-atomic form</td></tr>
 <tr><td><a href="#lowerinvoke">-lowerinvoke</a></td><td>Lower invoke and unwind, for unwindless code generators</td></tr>
 <tr><td><a href="#lowersetjmp">-lowersetjmp</a></td><td>Lower Set Jump</td></tr>
 <tr><td><a href="#lowerswitch">-lowerswitch</a></td><td>Lower SwitchInst's to branches</td></tr>
 <tr><td><a href="#mem2reg">-mem2reg</a></td><td>Promote Memory to Register</td></tr>
-<tr><td><a href="#memcpyopt">-memcpyopt</a></td><td>Optimize use of memcpy and friends</td></tr>
+<tr><td><a href="#memcpyopt">-memcpyopt</a></td><td>MemCpy Optimization</td></tr>
 <tr><td><a href="#mergefunc">-mergefunc</a></td><td>Merge Functions</td></tr>
 <tr><td><a href="#mergereturn">-mergereturn</a></td><td>Unify function exit nodes</td></tr>
 <tr><td><a href="#partial-inliner">-partial-inliner</a></td><td>Partial Inliner</td></tr>
-<tr><td><a href="#partialspecialization">-partialspecialization</a></td><td>Partial Specialization</td></tr>
 <tr><td><a href="#prune-eh">-prune-eh</a></td><td>Remove unused exception handling info</td></tr>
 <tr><td><a href="#reassociate">-reassociate</a></td><td>Reassociate expressions</td></tr>
 <tr><td><a href="#reg2mem">-reg2mem</a></td><td>Demote all values to stack slots</td></tr>
-<tr><td><a href="#scalarrepl">-scalarrepl</a></td><td>Scalar Replacement of Aggregates</td></tr>
+<tr><td><a href="#scalarrepl">-scalarrepl</a></td><td>Scalar Replacement of Aggregates (DT)</td></tr>
 <tr><td><a href="#sccp">-sccp</a></td><td>Sparse Conditional Constant Propagation</td></tr>
-<tr><td><a href="#sink">-sink</a></td><td>Code Sinking</td></tr>
 <tr><td><a href="#simplify-libcalls">-simplify-libcalls</a></td><td>Simplify well-known library calls</td></tr>
-<tr><td><a href="#simplify-libcalls-halfpowr">-simplify-libcalls-halfpowr</a></td><td>Simplify half_powr library calls</td></tr>
 <tr><td><a href="#simplifycfg">-simplifycfg</a></td><td>Simplify the CFG</td></tr>
-<tr><td><a href="#split-geps">-split-geps</a></td><td>Split complex GEPs into simple GEPs</td></tr>
-<tr><td><a href="#ssi">-ssi</a></td><td>Static Single Information Construction</td></tr>
-<tr><td><a href="#ssi-everything">-ssi-everything</a></td><td>Static Single Information Construction (everything, intended for debugging)</td></tr>
+<tr><td><a href="#sink">-sink</a></td><td>Code sinking</td></tr>
+<tr><td><a href="#sretpromotion">-sretpromotion</a></td><td>Promote sret arguments to multiple ret values</td></tr>
 <tr><td><a href="#strip">-strip</a></td><td>Strip all symbols from a module</td></tr>
 <tr><td><a href="#strip-dead-debug-info">-strip-dead-debug-info</a></td><td>Strip debug info for unused symbols</td></tr>
-<tr><td><a href="#strip-dead-prototypes">-strip-dead-prototypes</a></td><td>Remove unused function declarations</td></tr>
+<tr><td><a href="#strip-dead-prototypes">-strip-dead-prototypes</a></td><td>Strip Unused Function Prototypes</td></tr>
 <tr><td><a href="#strip-debug-declare">-strip-debug-declare</a></td><td>Strip all llvm.dbg.declare intrinsics</td></tr>
 <tr><td><a href="#strip-nondebug">-strip-nondebug</a></td><td>Strip all symbols, except dbg symbols, from a module</td></tr>
-<tr><td><a href="#sretpromotion">-sretpromotion</a></td><td>Promote sret arguments</td></tr>
 <tr><td><a href="#tailcallelim">-tailcallelim</a></td><td>Tail Call Elimination</td></tr>
 <tr><td><a href="#tailduplicate">-tailduplicate</a></td><td>Tail Duplication</td></tr>
 
@@ -208,24 +195,24 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 <tr><td><a href="#verify">-verify</a></td><td>Module Verifier</td></tr>
 <tr><td><a href="#view-cfg">-view-cfg</a></td><td>View CFG of function</td></tr>
 <tr><td><a href="#view-cfg-only">-view-cfg-only</a></td><td>View CFG of function (with no function bodies)</td></tr>
-<tr><td><a href="#view-dom">-view-dom</a></td><td>View dominator tree of function</td></tr>
-<tr><td><a href="#view-dom-only">-view-dom-only</a></td><td>View dominator tree of function (with no function bodies)</td></tr>
-<tr><td><a href="#view-postdom">-view-postdom</a></td><td>View post dominator tree of function</td></tr>
-<tr><td><a href="#view-postdom-only">-view-postdom-only</a></td><td>View post dominator tree of function (with no function bodies)</td></tr>
+<tr><td><a href="#view-dom">-view-dom</a></td><td>View dominance tree of function</td></tr>
+<tr><td><a href="#view-dom-only">-view-dom-only</a></td><td>View dominance tree of function (with no function bodies)</td></tr>
+<tr><td><a href="#view-postdom">-view-postdom</a></td><td>View postdominance tree of function</td></tr>
+<tr><td><a href="#view-postdom-only">-view-postdom-only</a></td><td>View postdominance tree of function (with no function bodies)</td></tr>
 </table>
+
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_section"> <a name="example">Analysis Passes</a></div>
-<div class="doc_text">
+<h2><a name="analyses">Analysis Passes</a></h2>
+<div>
   <p>This section describes the LLVM Analysis Passes.</p>
-</div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="aa-eval">-aa-eval: Exhaustive Alias Analysis Precision Evaluator</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>This is a simple N^2 alias analysis accuracy evaluator.
   Basically, for each function in the program, it simply queries to see how the
   alias analysis implementation answers alias queries between each pair of
@@ -236,10 +223,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="basicaa">-basicaa: Basic Alias Analysis (default AA impl)</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="basicaa">-basicaa: Basic Alias Analysis (stateless AA impl)</a>
+</h3>
+<div>
   <p>
   This is the default implementation of the Alias Analysis interface
   that simply implements a few identities (two different globals cannot alias,
@@ -248,30 +235,18 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="basiccg">-basiccg: Basic CallGraph Construction</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>Yet to be written.</p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="codegenprepare">-codegenprepare: Optimize for code generation</a>
-</div>
-<div class="doc_text">
-  <p>
-  This pass munges the code in the input function to better prepare it for
-  SelectionDAG-based code generation.  This works around limitations in it's
-  basic-block-at-a-time approach.  It should eventually be removed.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="count-aa">-count-aa: Count Alias Analysis Query Responses</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   A pass which can be used to count how many alias queries
   are being made and how the alias analysis implementation being used responds.
@@ -279,10 +254,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="debug-aa">-debug-aa: AA use debugger</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This simple pass checks alias analysis users to ensure that if they
   create a new value, they do not query AA without informing it of the value.
@@ -296,10 +271,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="domfrontier">-domfrontier: Dominance Frontier Construction</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass is a simple dominator construction algorithm for finding forward
   dominator frontiers.
@@ -307,10 +282,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="domtree">-domtree: Dominator Tree Construction</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass is a simple dominator construction algorithm for finding forward
   dominators.
@@ -318,10 +293,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="dot-callgraph">-dot-callgraph: Print Call Graph to 'dot' file</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass, only available in <code>opt</code>, prints the call graph into a
   <code>.dot</code> graph.  This graph can then be processed with the "dot" tool
@@ -330,10 +305,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="dot-cfg">-dot-cfg: Print CFG of function to 'dot' file</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass, only available in <code>opt</code>, prints the control flow graph
   into a <code>.dot</code> graph.  This graph can then be processed with the
@@ -342,10 +317,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="dot-cfg-only">-dot-cfg-only: Print CFG of function to 'dot' file (with no function bodies)</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass, only available in <code>opt</code>, prints the control flow graph
   into a <code>.dot</code> graph, omitting the function bodies.  This graph can
@@ -355,10 +330,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="dot-dom">-dot-dom: Print dominator tree of function to 'dot' file</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="dot-dom">-dot-dom: Print dominance tree of function to 'dot' file</a>
+</h3>
+<div>
   <p>
   This pass, only available in <code>opt</code>, prints the dominator tree
   into a <code>.dot</code> graph.  This graph can then be processed with the
@@ -367,11 +342,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="dot-dom-only">-dot-dom-only: Print dominator tree of function to 'dot' file (with no
-  function bodies)</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="dot-dom-only">-dot-dom-only: Print dominance tree of function to 'dot' file (with no function bodies)</a>
+</h3>
+<div>
   <p>
   This pass, only available in <code>opt</code>, prints the dominator tree
   into a <code>.dot</code> graph, omitting the function bodies.  This graph can
@@ -381,10 +355,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="dot-postdom">-dot-postdom: Print post dominator tree of function to 'dot' file</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="dot-postdom">-dot-postdom: Print postdominance tree of function to 'dot' file</a>
+</h3>
+<div>
   <p>
   This pass, only available in <code>opt</code>, prints the post dominator tree
   into a <code>.dot</code> graph.  This graph can then be processed with the
@@ -393,11 +367,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="dot-postdom-only">-dot-postdom-only: Print post dominator tree of function to 'dot' file
-  (with no function bodies)</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="dot-postdom-only">-dot-postdom-only: Print postdominance tree of function to 'dot' file (with no function bodies)</a>
+</h3>
+<div>
   <p>
   This pass, only available in <code>opt</code>, prints the post dominator tree
   into a <code>.dot</code> graph, omitting the function bodies.  This graph can
@@ -407,10 +380,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="globalsmodref-aa">-globalsmodref-aa: Simple mod/ref analysis for globals</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This simple pass provides alias and mod/ref information for global values
   that do not have their address taken, and keeps track of whether functions
@@ -420,43 +393,20 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="instcount">-instcount: Counts the various types of Instructions</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass collects the count of all instructions and reports them
   </p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="interprocedural-aa-eval">-interprocedural-aa-eval: Exhaustive Interprocedural Alias Analysis Precision Evaluator</a>
-</div>
-<div class="doc_text">
-  <p>This pass implements a simple N^2 alias analysis accuracy evaluator.
-  Basically, for each function in the program, it simply queries to see how the
-  alias analysis implementation answers alias queries between each pair of
-  pointers in the function.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="interprocedural-basic-aa">-interprocedural-basic-aa: Interprocedural Basic Alias Analysis</a>
-</div>
-<div class="doc_text">
-  <p>This pass defines the default implementation of the Alias Analysis interface
-  that simply implements a few identities (two different globals cannot alias,
-  etc), but otherwise does no analysis.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="intervals">-intervals: Interval Partition Construction</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This analysis calculates and represents the interval partition of a function,
   or a preexisting interval partition.
@@ -469,44 +419,44 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="iv-users">-iv-users: Induction Variable Users</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>Bookkeeping for "interesting" users of expressions computed from 
   induction variables.</p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="lazy-value-info">-lazy-value-info: Lazy Value Information Analysis</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>Interface for lazy computation of value constraint information.</p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="lda">-lda: Loop Dependence Analysis</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>Loop dependence analysis framework, which is used to detect dependences in
   memory accesses in loops.</p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="libcall-aa">-libcall-aa: LibCall Alias Analysis</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>LibCall Alias Analysis.</p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="lint">-lint: Check for common errors in LLVM IR</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="lint">-lint: Statically lint-checks LLVM IR</a>
+</h3>
+<div>
   <p>This pass statically checks for common and easily-identified constructs
   which produce undefined or likely unintended behavior in LLVM IR.</p>
  
@@ -534,18 +484,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="live-values">-live-values: Values Liveness Analysis</a>
-</div>
-<div class="doc_text">
-  <p>LLVM IR Value liveness analysis pass.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="loops">-loops: Natural Loop Construction</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="loops">-loops: Natural Loop Information</a>
+</h3>
+<div>
   <p>
   This analysis is used to identify natural loops and determine the loop depth
   of various nodes of the CFG.  Note that the loops identified may actually be
@@ -555,10 +497,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="memdep">-memdep: Memory Dependence Analysis</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   An analysis that determines, for a given memory operation, what preceding 
   memory operations it depends on.  It builds on alias analysis information, and 
@@ -568,10 +510,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="module-debuginfo">-module-debuginfo: Prints module debug info metadata</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="module-debuginfo">-module-debuginfo: Decodes module-level debug info</a>
+</h3>
+<div>
   <p>This pass decodes the debug info metadata in a module and prints in a
  (sufficiently-prepared-) human-readable form.
 
@@ -581,10 +523,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="no-aa">-no-aa: No Alias Analysis (always returns 'may' alias)</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Always returns "I don't know" for alias queries.  NoAA is unlike other alias
   analysis implementations, in that it does not chain to a previous analysis. As
@@ -593,10 +535,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="no-profile">-no-profile: No Profile Information</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   The default "no profile" implementation of the abstract
   <code>ProfileInfo</code> interface.
@@ -604,19 +546,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="pointertracking">-pointertracking: Track pointer bounds.</a>
-</div>
-<div class="doc_text">
-  <p>Tracking of pointer bounds.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="postdomfrontier">-postdomfrontier: Post-Dominance Frontier Construction</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass is a simple post-dominator construction algorithm for finding
   post-dominator frontiers.
@@ -624,10 +557,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="postdomtree">-postdomtree: Post-Dominator Tree Construction</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass is a simple post-dominator construction algorithm for finding
   post-dominators.
@@ -635,18 +568,18 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="print-alias-sets">-print-alias-sets: Alias Set Printer</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>Yet to be written.</p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="print-callgraph">-print-callgraph: Print a call graph</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass, only available in <code>opt</code>, prints the call graph to
   standard error in a human-readable form.
@@ -654,10 +587,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="print-callgraph-sccs">-print-callgraph-sccs: Print SCCs of the Call Graph</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass, only available in <code>opt</code>, prints the SCCs of the call
   graph to standard error in a human-readable form.
@@ -665,10 +598,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="print-cfg-sccs">-print-cfg-sccs: Print SCCs of each function CFG</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass, only available in <code>opt</code>, prints the SCCs of each
   function CFG to standard error in a human-readable form.
@@ -676,10 +609,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="print-dbginfo">-print-dbginfo: Print debug info in human readable form</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>Pass that prints instructions, and associated debug info:</p>
   <ul>
   
@@ -690,18 +623,18 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="print-dom-info">-print-dom-info: Dominator Info Printer</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>Dominator Info Printer.</p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="print-externalfnconstants">-print-externalfnconstants: Print external fn callsites passed constants</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass, only available in <code>opt</code>, prints out call sites to
   external functions that are called with constant arguments.  This can be
@@ -711,10 +644,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="print-function">-print-function: Print function to stderr</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   The <code>PrintFunctionPass</code> class is designed to be pipelined with
   other <code>FunctionPass</code>es, and prints out the functions of the module
@@ -723,20 +656,20 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="print-module">-print-module: Print module to stderr</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass simply prints out the entire module when it is executed.
   </p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="print-used-types">-print-used-types: Find Used Types</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass is used to seek out all of the types in use by the program.  Note
   that this analysis explicitly does not include types only used by the symbol
@@ -744,20 +677,20 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="profile-estimator">-profile-estimator: Estimate profiling information</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>Profiling information that estimates the profiling information 
   in a very crude and unimaginative way.
   </p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="profile-loader">-profile-loader: Load profile information from llvmprof.out</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   A concrete implementation of profiling information that loads the information
   from a profile dump file.
@@ -765,16 +698,16 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="profile-verifier">-profile-verifier: Verify profiling information</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>Pass that checks profiling information for plausibility.</p>
 </div>
-<div class="doc_subsection">
-  <a name="regions">-regions: Detect single entry single exit regions in a function</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="regions">-regions: Detect single entry single exit regions</a>
+</h3>
+<div>
   <p>
   The <code>RegionInfo</code> pass detects single entry single exit regions in a
   function, where a region is defined as any subgraph that is connected to the
@@ -784,10 +717,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="scalar-evolution">-scalar-evolution: Scalar Evolution Analysis</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   The <code>ScalarEvolution</code> analysis can be used to analyze and
   catagorize scalar expressions in loops.  It specializes in recognizing general
@@ -803,10 +736,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="scev-aa">-scev-aa: </a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="scev-aa">-scev-aa: ScalarEvolution-based Alias Analysis</a>
+</h3>
+<div>
   <p>Simple alias analysis implemented in terms of ScalarEvolution queries.
  
   This differs from traditional loop dependence analysis in that it tests
@@ -819,62 +752,26 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="strip-dead-debug-info">-strip-dead-debug-info: Strip debug info for unused symbols</a>
-</div>
-<div class="doc_text">
-  <p>
-  performs code stripping. this transformation can delete:
-  </p>
-  
-  <ol>
-    <li>names for virtual registers</li>
-    <li>symbols for internal globals and functions</li>
-    <li>debug information</li>
-  </ol>
-  
-  <p>
-  note that this transformation makes code much less readable, so it should
-  only be used in situations where the <tt>strip</tt> utility would be used,
-  such as reducing code size or making it harder to reverse engineer code.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="targetdata">-targetdata: Target Data Layout</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>Provides other passes access to information on how the size and alignment
   required by the the target ABI for various data types.</p>
 </div>
 
-<!-- ======================================================================= -->
-<div class="doc_section"> <a name="transform">Transform Passes</a></div>
-<div class="doc_text">
-  <p>This section describes the LLVM Transform Passes.</p>
 </div>
 
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="abcd">-abcd: Remove redundant conditional branches</a>
-</div>
-<div class="doc_text">
-  <p>ABCD removes conditional branch instructions that can be proved redundant.
-  With the SSI representation, each variable has a constraint. By analyzing these 
-  constraints we can prove that a branch is redundant. When a branch is proved 
-  redundant it means that one direction will always be taken; thus, we can change 
-  this branch into an unconditional jump.</p>
-  <p>It is advisable to run <a href="#simplifycfg">SimplifyCFG</a> and 
-  <a href="#adce">Aggressive Dead Code Elimination</a> after ABCD 
-  to clean up the code.</p>
-</div>
+<!-- ======================================================================= -->
+<h2><a name="transforms">Transform Passes</a></h2>
+<div>
+  <p>This section describes the LLVM Transform Passes.</p>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="adce">-adce: Aggressive Dead Code Elimination</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>ADCE aggressively tries to eliminate code. This pass is similar to
   <a href="#dce">DCE</a> but it assumes that values are dead until proven 
   otherwise. This is similar to <a href="#sccp">SCCP</a>, except applied to 
@@ -882,19 +779,19 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="always-inline">-always-inline: Inliner for always_inline functions</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>A custom inliner that handles only functions that are marked as 
   "always inline".</p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="argpromotion">-argpromotion: Promote 'by reference' arguments to scalars</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass promotes "by reference" arguments to be "by value" arguments.  In
   practice, this means looking for internal functions that have pointer
@@ -922,10 +819,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="block-placement">-block-placement: Profile Guided Basic Block Placement</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>This pass is a very simple profile guided basic block placement algorithm.
   The idea is to put frequently executed blocks together at the start of the
   function and hopefully increase the number of fall-through conditional
@@ -934,10 +831,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="break-crit-edges">-break-crit-edges: Break critical edges in CFG</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Break all of the critical edges in the CFG by inserting a dummy basic block.
   It may be "required" by passes that cannot deal with critical edges. This
@@ -947,20 +844,20 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="codegenprepare">-codegenprepare: Prepare a function for code generation</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="codegenprepare">-codegenprepare: Optimize for code generation</a>
+</h3>
+<div>
   This pass munges the code in the input function to better prepare it for
   SelectionDAG-based code generation. This works around limitations in it's
   basic-block-at-a-time approach. It should eventually be removed.
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="constmerge">-constmerge: Merge Duplicate Global Constants</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Merges duplicate global constants together into a single constant that is
   shared.  This is useful because some passes (ie TraceValues) insert a lot of
@@ -970,10 +867,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="constprop">-constprop: Simple constant propagation</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>This file implements constant propagation and merging. It looks for
   instructions involving only constant operands and replaces them with a
   constant value instead of an instruction. For example:</p>
@@ -986,10 +883,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="dce">-dce: Dead Code Elimination</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Dead code elimination is similar to <a href="#die">dead instruction
   elimination</a>, but it rechecks instructions that were used by removed
@@ -998,10 +895,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="deadargelim">-deadargelim: Dead Argument Elimination</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass deletes dead arguments from internal functions.  Dead argument
   elimination removes arguments which are directly dead, as well as arguments
@@ -1016,10 +913,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="deadtypeelim">-deadtypeelim: Dead Type Elimination</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass is used to cleanup the output of GCC.  It eliminate names for types
   that are unused in the entire translation unit, using the <a
@@ -1028,10 +925,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="die">-die: Dead Instruction Elimination</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Dead instruction elimination performs a single pass over the function,
   removing instructions that are obviously dead.
@@ -1039,10 +936,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="dse">-dse: Dead Store Elimination</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   A trivial dead store elimination that only considers basic-block local
   redundant stores.
@@ -1050,10 +947,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="functionattrs">-functionattrs: Deduce function attributes</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>A simple interprocedural pass which walks the call-graph, looking for 
   functions which do not access or only read non-local memory, and marking them 
   readnone/readonly.  In addition, it marks function arguments (of pointer type) 
@@ -1065,10 +962,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="globaldce">-globaldce: Dead Global Elimination</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This transform is designed to eliminate unreachable internal globals from the
   program.  It uses an aggressive algorithm, searching out globals that are
@@ -1079,10 +976,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="globalopt">-globalopt: Global Variable Optimizer</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass transforms simple global variables that never have their address
   taken.  If obviously true, it marks read/write globals as constant, deletes
@@ -1091,10 +988,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="gvn">-gvn: Global Value Numbering</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass performs global value numbering to eliminate fully and partially
   redundant instructions.  It also performs redundant load elimination.
@@ -1102,10 +999,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="indvars">-indvars: Canonicalize Induction Variables</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This transformation analyzes and transforms the induction variables (and
   computations derived from them) into simpler forms suitable for subsequent
@@ -1153,20 +1050,20 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="inline">-inline: Function Integration/Inlining</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Bottom-up inlining of functions into callees.
   </p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="insert-edge-profiling">-insert-edge-profiling: Insert instrumentation for edge profiling</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass instruments the specified program with counters for edge profiling.
   Edge profiling can give a reasonable approximation of the hot paths through a
@@ -1181,10 +1078,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="insert-optimal-edge-profiling">-insert-optimal-edge-profiling: Insert optimal instrumentation for edge profiling</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>This pass instruments the specified program with counters for edge profiling.
   Edge profiling can give a reasonable approximation of the hot paths through a
   program, and is used for a wide variety of program transformations.
@@ -1192,10 +1089,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="instcombine">-instcombine: Combine redundant instructions</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Combine instructions to form fewer, simple
   instructions.  This pass does not modify the CFG This pass is where algebraic
@@ -1246,10 +1143,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="internalize">-internalize: Internalize Global Symbols</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass loops over all of the functions in the input module, looking for a
   main function.  If a main function is found, all other functions and all
@@ -1258,10 +1155,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="ipconstprop">-ipconstprop: Interprocedural constant propagation</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass implements an <em>extremely</em> simple interprocedural constant
   propagation pass.  It could certainly be improved in many different ways,
@@ -1272,10 +1169,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="ipsccp">-ipsccp: Interprocedural Sparse Conditional Constant Propagation</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   An interprocedural variant of <a href="#sccp">Sparse Conditional Constant 
   Propagation</a>.
@@ -1283,10 +1180,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="jump-threading">-jump-threading: Thread control through conditional blocks</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="jump-threading">-jump-threading: Jump Threading</a>
+</h3>
+<div>
   <p>
   Jump threading tries to find distinct threads of control flow running through
   a basic block. This pass looks at blocks that have multiple predecessors and
@@ -1312,10 +1209,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="lcssa">-lcssa: Loop-Closed SSA Form Pass</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass transforms loops by placing phi nodes at the end of the loops for
   all values that are live across the loop boundary.  For example, it turns
@@ -1341,10 +1238,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="licm">-licm: Loop Invariant Code Motion</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass performs loop invariant code motion, attempting to remove as much
   code from the body of a loop as possible.  It does this by either hoisting
@@ -1376,11 +1273,12 @@ if (X &lt; 3) {</pre>
         variable.</li>
   </ul>
 </div>
+
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="loop-deletion">-loop-deletion: Dead Loop Deletion Pass</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="loop-deletion">-loop-deletion: Delete dead loops</a>
+</h3>
+<div>
   <p>
   This file implements the Dead Loop Deletion Pass.  This pass is responsible
   for eliminating loops with non-infinite computable trip counts that have no
@@ -1390,10 +1288,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="loop-extract">-loop-extract: Extract loops into new functions</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   A pass wrapper around the <code>ExtractLoop()</code> scalar transformation to 
   extract each top-level loop into its own new function. If the loop is the
@@ -1403,10 +1301,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="loop-extract-single">-loop-extract-single: Extract at most one loop into a new function</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Similar to <a href="#loop-extract">Extract loops into new functions</a>,
   this pass extracts one natural loop from the program into a function if it
@@ -1415,21 +1313,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="loop-index-split">-loop-index-split: Index Split Loops</a>
-</div>
-<div class="doc_text">
-  <p>
-  This pass divides loop's iteration range by spliting loop such that each 
-  individual loop is executed efficiently.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="loop-reduce">-loop-reduce: Loop Strength Reduction</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass performs a strength reduction on array references inside loops that
   have as one or more of their components the loop induction variable.  This is
@@ -1440,60 +1327,18 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="loop-rotate">-loop-rotate: Rotate Loops</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>A simple loop rotation transformation.</p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="loop-unroll">-loop-unroll: Unroll loops</a>
-</div>
-<div class="doc_text">
-  <p>
-  This pass implements a simple loop unroller.  It works best when loops have
-  been canonicalized by the <a href="#indvars"><tt>-indvars</tt></a> pass,
-  allowing it to determine the trip counts of loops easily.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="loop-unswitch">-loop-unswitch: Unswitch loops</a>
-</div>
-<div class="doc_text">
-  <p>
-  This pass transforms loops that contain branches on loop-invariant conditions
-  to have multiple loops.  For example, it turns the left into the right code:
-  </p>
-  
-  <pre
->for (...)                  if (lic)
-  A                          for (...)
-  if (lic)                     A; B; C
-    B                      else
-  C                          for (...)
-                               A; C</pre>
-  
-  <p>
-  This can increase the size of the code exponentially (doubling it every time
-  a loop is unswitched) so we only unswitch if the resultant code will be
-  smaller than a threshold.
-  </p>
-  
-  <p>
-  This pass expects LICM to be run before it to hoist invariant conditions out
-  of the loop, to make the unswitching opportunity obvious.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="loop-simplify">-loop-simplify: Canonicalize natural loops</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass performs several transformations to transform natural loops into a
   simpler form, which makes subsequent analyses and transformations simpler and
@@ -1531,26 +1376,52 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="lowerallocs">-lowerallocs: Lower allocations from instructions to calls</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="loop-unroll">-loop-unroll: Unroll loops</a>
+</h3>
+<div>
   <p>
-  Turn <tt>malloc</tt> and <tt>free</tt> instructions into <tt>@malloc</tt> and
-  <tt>@free</tt> calls.
+  This pass implements a simple loop unroller.  It works best when loops have
+  been canonicalized by the <a href="#indvars"><tt>-indvars</tt></a> pass,
+  allowing it to determine the trip counts of loops easily.
   </p>
+</div>
 
+<!-------------------------------------------------------------------------- -->
+<h3>
+  <a name="loop-unswitch">-loop-unswitch: Unswitch loops</a>
+</h3>
+<div>
+  <p>
+  This pass transforms loops that contain branches on loop-invariant conditions
+  to have multiple loops.  For example, it turns the left into the right code:
+  </p>
+  
+  <pre
+>for (...)                  if (lic)
+  A                          for (...)
+  if (lic)                     A; B; C
+    B                      else
+  C                          for (...)
+                               A; C</pre>
+  
   <p>
-  This is a target-dependent tranformation because it depends on the size of
-  data types and alignment constraints.
+  This can increase the size of the code exponentially (doubling it every time
+  a loop is unswitched) so we only unswitch if the resultant code will be
+  smaller than a threshold.
+  </p>
+  
+  <p>
+  This pass expects LICM to be run before it to hoist invariant conditions out
+  of the loop, to make the unswitching opportunity obvious.
   </p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="loweratomic">-loweratomic: Lower atomic intrinsics</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="loweratomic">-loweratomic: Lower atomic intrinsics to non-atomic form</a>
+</h3>
+<div>
   <p>
   This pass lowers atomic intrinsics to non-atomic form for use in a known
   non-preemptible environment.
@@ -1565,10 +1436,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="lowerinvoke">-lowerinvoke: Lower invoke and unwind, for unwindless code generators</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This transformation is designed for use by code generators which do not yet
   support stack unwinding.  This pass supports two models of exception handling
@@ -1606,10 +1477,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="lowersetjmp">-lowersetjmp: Lower Set Jump</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
    Lowers <tt>setjmp</tt> and <tt>longjmp</tt> to use the LLVM invoke and unwind
    instructions as necessary.
@@ -1635,10 +1506,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="lowerswitch">-lowerswitch: Lower SwitchInst's to branches</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Rewrites <tt>switch</tt> instructions with a sequence of branches, which
   allows targets to get away with not implementing the switch instruction until
@@ -1647,10 +1518,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="mem2reg">-mem2reg: Promote Memory to Register</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This file promotes memory references to be register references.  It promotes
   <tt>alloca</tt> instructions which only have <tt>load</tt>s and
@@ -1663,10 +1534,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="memcpyopt">-memcpyopt: Optimize use of memcpy and friend</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="memcpyopt">-memcpyopt: MemCpy Optimization</a>
+</h3>
+<div>
   <p>
   This pass performs various transformations related to eliminating memcpy
   calls, or transforming sets of stores into memset's.
@@ -1674,10 +1545,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="mergefunc">-mergefunc: Merge Functions</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>This pass looks for equivalent functions that are mergable and folds them.
  
   A hash is computed from the function, based on its type and number of
@@ -1695,10 +1566,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="mergereturn">-mergereturn: Unify function exit nodes</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Ensure that functions have at most one <tt>ret</tt> instruction in them.
   Additionally, it keeps track of which node is the new exit node of the CFG.
@@ -1706,36 +1577,20 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="partial-inliner">-partial-inliner: Partial Inliner</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>This pass performs partial inlining, typically by inlining an if 
   statement that surrounds the body of the function.
   </p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="partialspecialization">-partialspecialization: Partial Specialization</a>
-</div>
-<div class="doc_text">
-  <p>This pass finds function arguments that are often a common constant and 
-  specializes a version of the called function for that constant.
- 
-  This pass simply does the cloning for functions it specializes.  It depends
-  on <a href="#ipsccp">IPSCCP</a> and <a href="#deadargelim">DAE</a> to clean up the results.
- 
-  The initial heuristic favors constant arguments that are used in control 
-  flow.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="prune-eh">-prune-eh: Remove unused exception handling info</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This file implements a simple interprocedural pass which walks the call-graph,
   turning <tt>invoke</tt> instructions into <tt>call</tt> instructions if and
@@ -1745,10 +1600,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="reassociate">-reassociate: Reassociate expressions</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass reassociates commutative expressions in an order that is designed
   to promote better constant propagation, GCSE, LICM, PRE, etc.
@@ -1768,10 +1623,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="reg2mem">-reg2mem: Demote all values to stack slots</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This file demotes all registers to memory references.  It is intented to be
   the inverse of <a href="#mem2reg"><tt>-mem2reg</tt></a>.  By converting to
@@ -1785,10 +1640,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="scalarrepl">-scalarrepl: Scalar Replacement of Aggregates</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="scalarrepl">-scalarrepl: Scalar Replacement of Aggregates (DT)</a>
+</h3>
+<div>
   <p>
   The well-known scalar replacement of aggregates transformation.  This
   transform breaks up <tt>alloca</tt> instructions of aggregate type (structure
@@ -1807,10 +1662,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="sccp">-sccp: Sparse Conditional Constant Propagation</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Sparse conditional constant propagation and merging, which can be summarized
   as:
@@ -1830,20 +1685,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="sink">-sink: Code Sinking</a>
-</div>
-<div class="doc_text">
-  <p>This pass moves instructions into successor blocks, when possible, so that
- they aren't executed on paths where their results aren't needed.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="simplify-libcalls">-simplify-libcalls: Simplify well-known library calls</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Applies a variety of small optimizations for calls to specific well-known 
   function calls (e.g. runtime library functions). For example, a call
@@ -1853,20 +1698,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="simplify-libcalls-halfpowr">-simplify-libcalls-halfpowr: Simplify half_powr library calls</a>
-</div>
-<div class="doc_text">
-  <p>Simple pass that applies an experimental transformation on calls 
-  to specific functions.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="simplifycfg">-simplifycfg: Simplify the CFG</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Performs dead code elimination and basic block merging. Specifically:
   </p>
@@ -1882,47 +1717,43 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="split-geps">-split-geps: Split complex GEPs into simple GEPs</a>
-</div>
-<div class="doc_text">
-  <p>This function breaks GEPs with more than 2 non-zero operands into smaller
-  GEPs each with no more than 2 non-zero operands. This exposes redundancy
-  between GEPs with common initial operand sequences.
+<h3>
+  <a name="sink">-sink: Code sinking</a>
+</h3>
+<div>
+  <p>This pass moves instructions into successor blocks, when possible, so that
+ they aren't executed on paths where their results aren't needed.
   </p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="ssi">-ssi: Static Single Information Construction</a>
-</div>
-<div class="doc_text">
-  <p>This pass converts a list of variables to the Static Single Information
-  form. 
- 
-  We are building an on-demand representation, that is, we do not convert
-  every single variable in the target function to SSI form. Rather, we receive
-  a list of target variables that must be converted. We also do not
-  completely convert a target variable to the SSI format. Instead, we only
-  change the variable in the points where new information can be attached
-  to its live range, that is, at branch points.
+<h3>
+  <a name="sretpromotion">-sretpromotion: Promote sret arguments to multiple ret values</a>
+</h3>
+<div>
+  <p>
+  This pass finds functions that return a struct (using a pointer to the struct
+  as the first argument of the function, marked with the '<tt>sret</tt>' attribute) and
+  replaces them with a new function that simply returns each of the elements of
+  that struct (using multiple return values).
   </p>
-</div>
 
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="ssi-everything">-ssi-everything: Static Single Information Construction (everything, intended for debugging)</a>
-</div>
-<div class="doc_text">
-  <p>A pass that runs <a href="#ssi">SSI</a> on every non-void variable, intended for debugging.
+  <p>
+  This pass works under a number of conditions:
   </p>
+
+  <ul>
+  <li>The returned struct must not contain other structs</li>
+  <li>The returned struct must only be used to load values from</li>
+  <li>The placeholder struct passed in is the result of an <tt>alloca</tt></li>
+  </ul>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="strip">-strip: Strip all symbols from a module</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   performs code stripping. this transformation can delete:
   </p>
@@ -1941,10 +1772,32 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="strip-dead-prototypes">-strip-dead-prototypes: Remove unused function declarations</a>
+<h3>
+  <a name="strip-dead-debug-info">-strip-dead-debug-info: Strip debug info for unused symbols</a>
+</h3>
+<div>
+  <p>
+  performs code stripping. this transformation can delete:
+  </p>
+  
+  <ol>
+    <li>names for virtual registers</li>
+    <li>symbols for internal globals and functions</li>
+    <li>debug information</li>
+  </ol>
+  
+  <p>
+  note that this transformation makes code much less readable, so it should
+  only be used in situations where the <tt>strip</tt> utility would be used,
+  such as reducing code size or making it harder to reverse engineer code.
+  </p>
 </div>
-<div class="doc_text">
+
+<!-------------------------------------------------------------------------- -->
+<h3>
+  <a name="strip-dead-prototypes">-strip-dead-prototypes: Strip Unused Function Prototypes</a>
+</h3>
+<div>
   <p>
   This pass loops over all of the functions in the input module, looking for
   dead declarations and removes them. Dead declarations are declarations of
@@ -1954,10 +1807,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="strip-debug-declare">-strip-debug-declare: Strip all llvm.dbg.declare intrinsics</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>This pass implements code stripping. Specifically, it can delete:</p>
   <ul>
   <li>names for virtual registers</li>
@@ -1972,10 +1825,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="strip-nondebug">-strip-nondebug: Strip all symbols, except dbg symbols, from a module</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>This pass implements code stripping. Specifically, it can delete:</p>
   <ul>
   <li>names for virtual registers</li>
@@ -1990,33 +1843,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="sretpromotion">-sretpromotion: Promote sret arguments</a>
-</div>
-<div class="doc_text">
-  <p>
-  This pass finds functions that return a struct (using a pointer to the struct
-  as the first argument of the function, marked with the '<tt>sret</tt>' attribute) and
-  replaces them with a new function that simply returns each of the elements of
-  that struct (using multiple return values).
-  </p>
-
-  <p>
-  This pass works under a number of conditions:
-  </p>
-
-  <ul>
-  <li>The returned struct must not contain other structs</li>
-  <li>The returned struct must only be used to load values from</li>
-  <li>The placeholder struct passed in is the result of an <tt>alloca</tt></li>
-  </ul>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="tailcallelim">-tailcallelim: Tail Call Elimination</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This file transforms calls of the current function (self recursion) followed
   by a return instruction with a branch to the entry of the function, creating
@@ -2045,10 +1875,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="tailduplicate">-tailduplicate: Tail Duplication</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass performs a limited form of tail duplication, intended to simplify
   CFGs by removing some unconditional branches.  This pass is necessary to
@@ -2058,17 +1888,18 @@ if (X &lt; 3) {</pre>
   </p>
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_section"> <a name="transform">Utility Passes</a></div>
-<div class="doc_text">
+<h2><a name="utilities">Utility Passes</a></h2>
+<div>
   <p>This section describes the LLVM Utility Passes.</p>
-</div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="deadarghaX0r">-deadarghaX0r: Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Same as dead argument elimination, but deletes arguments to functions which
   are external.  This is only for use by <a
@@ -2076,20 +1907,20 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="extract-blocks">-extract-blocks: Extract Basic Blocks From Module (for bugpoint use)</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   This pass is used by bugpoint to extract all blocks from the module into their
   own functions.</p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="instnamer">-instnamer: Assign names to anonymous instructions</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>This is a little utility pass that gives instructions names, this is mostly
  useful when diffing the effect of an optimization because deleting an
  unnamed instruction can change all other instruction numbering, making the
@@ -2098,10 +1929,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="preverify">-preverify: Preliminary module verification</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Ensures that the module is in the form required by the <a
   href="#verifier">Module Verifier</a> pass.
@@ -2114,10 +1945,10 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="verify">-verify: Module Verifier</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Verifies an LLVM IR code. This is useful to run after an optimization which is
   undergoing testing. Note that <tt>llvm-as</tt> verifies its input before
@@ -2165,20 +1996,20 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="view-cfg">-view-cfg: View CFG of function</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Displays the control flow graph using the GraphViz tool.
   </p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
+<h3>
   <a name="view-cfg-only">-view-cfg-only: View CFG of function (with no function bodies)</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
   <p>
   Displays the control flow graph using the GraphViz tool, but omitting function
   bodies.
@@ -2186,22 +2017,20 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="view-dom">-view-dom: View dominator tree of function</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="view-dom">-view-dom: View dominance tree of function</a>
+</h3>
+<div>
   <p>
   Displays the dominator tree using the GraphViz tool.
   </p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="view-dom-only">-view-dom-only: View dominator tree of function (with no function
-  bodies)
-  </a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="view-dom-only">-view-dom-only: View dominance tree of function (with no function bodies)</a>
+</h3>
+<div>
   <p>
   Displays the dominator tree using the GraphViz tool, but omitting function
   bodies.
@@ -2209,28 +2038,28 @@ if (X &lt; 3) {</pre>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="view-postdom">-view-postdom: View post dominator tree of function</a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="view-postdom">-view-postdom: View postdominance tree of function</a>
+</h3>
+<div>
   <p>
   Displays the post dominator tree using the GraphViz tool.
   </p>
 </div>
 
 <!-------------------------------------------------------------------------- -->
-<div class="doc_subsection">
-  <a name="view-postdom-only">-view-postdom-only: View post dominator tree of function (with no
-  function bodies)
-  </a>
-</div>
-<div class="doc_text">
+<h3>
+  <a name="view-postdom-only">-view-postdom-only: View postdominance tree of function (with no function bodies)</a>
+</h3>
+<div>
   <p>
   Displays the post dominator tree using the GraphViz tool, but omitting
   function bodies.
   </p>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 
 <hr>
@@ -2241,8 +2070,8 @@ if (X &lt; 3) {</pre>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-02-13 21:57:25 +0100 (Sun, 13 Feb 2011) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html
index 0351dd0..6af922b 100644
--- a/docs/ProgrammersManual.html
+++ b/docs/ProgrammersManual.html
@@ -8,9 +8,9 @@
 </head>
 <body>
 
-<div class="doc_title">
+<h1>
   LLVM Programmer's Manual
-</div>
+</h1>
 
 <ol>
   <li><a href="#introduction">Introduction</a></li>
@@ -56,6 +56,7 @@ option</a></li>
     <ul>
     <li><a href="#ds_sequential">Sequential Containers (std::vector, std::list, etc)</a>
     <ul>
+      <li><a href="#dss_arrayref">llvm/ADT/ArrayRef.h</a></li>
       <li><a href="#dss_fixedarrays">Fixed Size Arrays</a></li>
       <li><a href="#dss_heaparrays">Heap Allocated Arrays</a></li>
       <li><a href="#dss_smallvector">"llvm/ADT/SmallVector.h"</a></li>
@@ -86,6 +87,7 @@ option</a></li>
       <li><a href="#dss_valuemap">"llvm/ADT/ValueMap.h"</a></li>
       <li><a href="#dss_intervalmap">"llvm/ADT/IntervalMap.h"</a></li>
       <li><a href="#dss_map">&lt;map&gt;</a></li>
+      <li><a href="#dss_inteqclasses">"llvm/ADT/IntEqClasses.h"</a></li>
       <li><a href="#dss_othermap">Other Map-Like Container Options</a></li>
     </ul></li>
     <li><a href="#ds_string">String-like containers</a>
@@ -208,12 +210,12 @@ with another <tt>Value</tt></a> </li>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="introduction">Introduction </a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This document is meant to highlight some of the important classes and
 interfaces available in the LLVM source-base.  This manual is not
@@ -240,24 +242,22 @@ href="/doxygen/InstVisitor_8h-source.html">InstVisitor</a></tt> template.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="general">General Information</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This section contains general information that is useful if you are working
 in the LLVM source-base, but that isn't specific to any particular API.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="stl">The C++ Standard Template Library</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM makes heavy use of the C++ Standard Template Library (STL),
 perhaps much more than you are used to, or have seen before.  Because of
@@ -303,11 +303,11 @@ to write maintainable code more than where to put your curly braces.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="stl">Other useful references</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <ol>
 <li><a href="http://www.fortran-2000.com/ArnaudRecipes/sharedlib.html">Using
@@ -316,26 +316,26 @@ static and shared libraries across platforms</a></li>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="apis">Important and useful LLVM APIs</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Here we highlight some LLVM APIs that are generally useful and good to
 know about when writing transformations.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="isa">The <tt>isa&lt;&gt;</tt>, <tt>cast&lt;&gt;</tt> and
   <tt>dyn_cast&lt;&gt;</tt> templates</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM source-base makes extensive use of a custom form of RTTI.
 These templates have many similarities to the C++ <tt>dynamic_cast&lt;&gt;</tt>
@@ -440,12 +440,12 @@ are lots of examples in the LLVM source base.</p>
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="string_apis">Passing strings (the <tt>StringRef</tt>
 and <tt>Twine</tt> classes)</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Although LLVM generally does not do much string manipulation, we do have
 several important APIs which take strings.  Two important examples are the
@@ -459,14 +459,12 @@ clients to perform a heap allocation which is usually unnecessary.  Instead,
 many LLVM APIs use a <tt>StringRef</tt> or a <tt>const Twine&amp;</tt> for
 passing strings efficiently.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="StringRef">The <tt>StringRef</tt> class</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>StringRef</tt> data type represents a reference to a constant string
 (a character array and a length) and supports the common operations available
@@ -502,11 +500,11 @@ small and pervasive enough in LLVM that it should always be passed by value.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="Twine">The <tt>Twine</tt> class</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>Twine</tt> class is an efficient way for APIs to accept concatenated
 strings.  For example, a common LLVM paradigm is to name one instruction based on
@@ -537,13 +535,14 @@ accept concatenated strings.</p>
 
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="DEBUG">The <tt>DEBUG()</tt> macro and <tt>-debug</tt> option</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Often when working on your pass you will put a bunch of debugging printouts
 and other code into your pass.  After you get it working, you want to remove
@@ -589,15 +588,13 @@ enable or disable it directly in gdb.  Just use "<tt>set DebugFlag=0</tt>" or
 program hasn't been started yet, you can always just run it with
 <tt>-debug</tt>.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="DEBUG_TYPE">Fine grained debug info with <tt>DEBUG_TYPE</tt> and
   the <tt>-debug-only</tt> option</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Sometimes you may find yourself in a situation where enabling <tt>-debug</tt>
 just turns on <b>too much</b> information (such as when working on the code
@@ -665,13 +662,15 @@ DEBUG_WITH_TYPE("", errs() &lt;&lt; "No debug type (2)\n");
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="Statistic">The <tt>Statistic</tt> class &amp; <tt>-stats</tt>
   option</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The "<tt><a
 href="/doxygen/Statistic_8h-source.html">llvm/ADT/Statistic.h</a></tt>" file
@@ -766,11 +765,11 @@ maintainable and useful.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ViewGraph">Viewing graphs while debugging code</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Several of the important data structures in LLVM are graphs: for example
 CFGs made out of LLVM <a href="#BasicBlock">BasicBlock</a>s, CFGs made out of
@@ -814,13 +813,15 @@ attributes, then you can <tt>call DAG.clearGraphAttrs()</tt>. </p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="datastructure">Picking the Right Data Structure for a Task</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>LLVM has a plethora of data structures in the <tt>llvm/ADT/</tt> directory,
  and we commonly use STL data structures.  This section describes the trade-offs
@@ -876,35 +877,47 @@ elements (but could contain many), for example, it's much better to use
 .  Doing so avoids (relatively) expensive malloc/free calls, which dwarf the
 cost of adding the elements to the container. </p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ds_sequential">Sequential Containers (std::vector, std::list, etc)</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 There are a variety of sequential containers available for you, based on your
 needs.  Pick the first in this section that will do what you want.
+
+<!-- _______________________________________________________________________ -->
+<h4>
+  <a name="dss_arrayref">llvm/ADT/ArrayRef.h</a>
+</h4>
+
+<div>
+<p>The llvm::ArrayRef class is the preferred class to use in an interface that
+   accepts a sequential list of elements in memory and just reads from them.  By
+   taking an ArrayRef, the API can be passed a fixed size array, an std::vector,
+   an llvm::SmallVector and anything else that is contiguous in memory.
+</p>
 </div>
 
+
+  
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_fixedarrays">Fixed Size Arrays</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>Fixed size arrays are very simple and very fast.  They are good if you know
 exactly how many elements you have, or you have a (low) upper bound on how many
 you have.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_heaparrays">Heap Allocated Arrays</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>Heap allocated arrays (new[] + delete[]) are also simple.  They are good if
 the number of elements is variable, if you know how many elements you will need
 before the array is allocated, and if the array is usually large (if not,
@@ -916,11 +929,11 @@ construct those elements actually used).</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_smallvector">"llvm/ADT/SmallVector.h"</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p><tt>SmallVector&lt;Type, N&gt;</tt> is a simple class that looks and smells
 just like <tt>vector&lt;Type&gt;</tt>:
 it supports efficient iteration, lays out elements in memory order (so you can
@@ -945,11 +958,11 @@ SmallVectors are most useful when on the stack.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_vector">&lt;vector&gt;</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>
 std::vector is well loved and respected.  It is useful when SmallVector isn't:
 when the size of the vector is often large (thus the small optimization will
@@ -987,11 +1000,11 @@ the loop.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_deque">&lt;deque&gt;</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>std::deque is, in some senses, a generalized version of std::vector.  Like
 std::vector, it provides constant time random access and other similar
 properties, but it also provides efficient access to the front of the list.  It
@@ -1003,11 +1016,11 @@ something cheaper.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_list">&lt;list&gt;</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>std::list is an extremely inefficient class that is rarely useful.
 It performs a heap allocation for every element inserted into it, thus having an
 extremely high constant factor, particularly for small data types.  std::list
@@ -1021,11 +1034,11 @@ not invalidate iterator or pointers to other elements in the list.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_ilist">llvm/ADT/ilist.h</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p><tt>ilist&lt;T&gt;</tt> implements an 'intrusive' doubly-linked list.  It is
 intrusive, because it requires the element to store and provide access to the
 prev/next pointers for the list.</p>
@@ -1051,22 +1064,22 @@ Related classes of interest are explained in the following subsections:
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_ilist_traits">ilist_traits</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p><tt>ilist_traits&lt;T&gt;</tt> is <tt>ilist&lt;T&gt;</tt>'s customization
 mechanism. <tt>iplist&lt;T&gt;</tt> (and consequently <tt>ilist&lt;T&gt;</tt>)
 publicly derive from this traits class.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_iplist">iplist</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p><tt>iplist&lt;T&gt;</tt> is <tt>ilist&lt;T&gt;</tt>'s base and as such
 supports a slightly narrower interface. Notably, inserters from
 <tt>T&amp;</tt> are absent.</p>
@@ -1076,11 +1089,11 @@ used for a wide variety of customizations.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_ilist_node">llvm/ADT/ilist_node.h</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p><tt>ilist_node&lt;T&gt;</tt> implements a the forward and backward links
 that are expected by the <tt>ilist&lt;T&gt;</tt> (and analogous containers)
 in the default manner.</p>
@@ -1091,11 +1104,11 @@ in the default manner.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_ilist_sentinel">Sentinels</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p><tt>ilist</tt>s have another specialty that must be considered. To be a good
 citizen in the C++ ecosystem, it needs to support the standard container
 operations, such as <tt>begin</tt> and <tt>end</tt> iterators, etc. Also, the
@@ -1129,11 +1142,11 @@ field in the ghostly sentinel which can be legally accessed.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_other">Other Sequential Container options</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>Other STL containers are available, such as std::string.</p>
 
 <p>There are also various STL adapter classes such as std::queue,
@@ -1142,27 +1155,25 @@ underlying container but don't affect the cost of the container itself.</p>
 
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ds_set">Set-Like Containers (std::set, SmallSet, SetVector, etc)</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Set-like containers are useful when you need to canonicalize multiple values
 into a single representation.  There are several different choices for how to do
 this, providing various trade-offs.</p>
 
-</div>
-
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_sortedvectorset">A sorted 'vector'</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>If you intend to insert a lot of elements, then do a lot of queries, a
 great approach is to use a vector (or other sequential container) with
@@ -1180,11 +1191,11 @@ efficiently queried with a standard binary or radix search.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_smallset">"llvm/ADT/SmallSet.h"</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>If you have a set-like data structure that is usually small and whose elements
 are reasonably small, a <tt>SmallSet&lt;Type, N&gt;</tt> is a good choice.  This set
@@ -1203,11 +1214,11 @@ and erasing, but does not support iteration.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_smallptrset">"llvm/ADT/SmallPtrSet.h"</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>SmallPtrSet has all the advantages of <tt>SmallSet</tt> (and a <tt>SmallSet</tt> of pointers is 
 transparently implemented with a <tt>SmallPtrSet</tt>), but also supports iterators.  If
@@ -1223,11 +1234,11 @@ visited in sorted order.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_denseset">"llvm/ADT/DenseSet.h"</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 DenseSet is a simple quadratically probed hash table.  It excels at supporting
@@ -1242,11 +1253,11 @@ href="#dss_densemap">DenseMap</a> has.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_FoldingSet">"llvm/ADT/FoldingSet.h"</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 FoldingSet is an aggregate class that is really good at uniquing
@@ -1279,11 +1290,11 @@ elements.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_set">&lt;set&gt;</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p><tt>std::set</tt> is a reasonable all-around set class, which is decent at
 many things but great at nothing.  std::set allocates memory for each element
@@ -1304,11 +1315,11 @@ std::set is almost never a good choice.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_setvector">"llvm/ADT/SetVector.h"</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>LLVM's SetVector&lt;Type&gt; is an adapter class that combines your choice of
 a set-like container along with a <a href="#ds_sequential">Sequential 
 Container</a>.  The important property
@@ -1344,11 +1355,11 @@ heap traffic.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_uniquevector">"llvm/ADT/UniqueVector.h"</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 UniqueVector is similar to <a href="#dss_setvector">SetVector</a>, but it
@@ -1364,11 +1375,11 @@ factors, and produces a lot of malloc traffic.  It should be avoided.</p>
 
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_otherset">Other Set-Like Container Options</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 The STL provides several other options, such as std::multiset and the various 
@@ -1384,22 +1395,23 @@ better.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ds_map">Map-Like Containers (std::map, DenseMap, etc)</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 Map-like containers are useful when you want to associate data to a key.  As
 usual, there are a lot of different ways to do this. :)
-</div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_sortedvectormap">A sorted 'vector'</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 If your usage pattern follows a strict insert-then-query approach, you can
@@ -1412,11 +1424,11 @@ vectors for sets.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_stringmap">"llvm/ADT/StringMap.h"</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 Strings are commonly used as keys in maps, and they are difficult to support
@@ -1446,11 +1458,11 @@ copies a string if a value is inserted into the table.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_indexedmap">"llvm/ADT/IndexedMap.h"</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>
 IndexedMap is a specialized container for mapping small dense integers (or
 values that can be mapped to small dense integers) to some other type.  It is
@@ -1466,11 +1478,11 @@ virtual register ID).</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_densemap">"llvm/ADT/DenseMap.h"</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 DenseMap is a simple quadratically probed hash table.  It excels at supporting
@@ -1492,11 +1504,11 @@ inserted into the map) that it needs internally.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_valuemap">"llvm/ADT/ValueMap.h"</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 ValueMap is a wrapper around a <a href="#dss_densemap">DenseMap</a> mapping
@@ -1509,11 +1521,11 @@ a <code>Config</code> parameter to the ValueMap template.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_intervalmap">"llvm/ADT/IntervalMap.h"</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p> IntervalMap is a compact map for small keys and values. It maps key
 intervals instead of single keys, and it will automatically coalesce adjacent
@@ -1526,11 +1538,11 @@ as STL iterators. The heavyweight iterators allow a smaller data structure.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_map">&lt;map&gt;</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 std::map has similar characteristics to <a href="#dss_set">std::set</a>: it uses
@@ -1546,11 +1558,31 @@ another element takes place).</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="dss_othermap">Other Map-Like Container Options</a>
+<h4>
+  <a name="dss_inteqclasses">"llvm/ADT/IntEqClasses.h"</a>
+</h4>
+
+<div>
+
+<p>IntEqClasses provides a compact representation of equivalence classes of
+small integers. Initially, each integer in the range 0..n-1 has its own
+equivalence class. Classes can be joined by passing two class representatives to
+the join(a, b) method. Two integers are in the same class when findLeader()
+returns the same representative.</p>
+
+<p>Once all equivalence classes are formed, the map can be compressed so each
+integer 0..n-1 maps to an equivalence class number in the range 0..m-1, where m
+is the total number of equivalence classes. The map must be uncompressed before
+it can be edited again.</p>
+
 </div>
 
-<div class="doc_text">
+<!-- _______________________________________________________________________ -->
+<h4>
+  <a name="dss_othermap">Other Map-Like Container Options</a>
+</h4>
+
+<div>
 
 <p>
 The STL provides several other options, such as std::multimap and the various 
@@ -1564,12 +1596,14 @@ always better.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ds_string">String-like containers</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 TODO: const char* vs stringref vs smallstring vs std::string.  Describe twine,
@@ -1579,11 +1613,11 @@ xref to #string_apis.
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ds_bit">Bit storage containers (BitVector, SparseBitVector)</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>Unlike the other containers, there are only two bit storage containers, and 
 choosing when to use each is relatively straightforward.</p>
 
@@ -1593,14 +1627,13 @@ implementation in many common compilers (e.g. commonly available versions of
 GCC) is extremely inefficient and 2) the C++ standards committee is likely to
 deprecate this container and/or change it significantly somehow.  In any case,
 please don't use it.</p>
-</div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_bitvector">BitVector</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p> The BitVector container provides a dynamic size set of bits for manipulation.
 It supports individual bit setting/testing, as well as set operations.  The set
 operations take time O(size of bitvector), but operations are performed one word
@@ -1611,11 +1644,11 @@ the number of set bits to be high (IE a dense set).
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_smallbitvector">SmallBitVector</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p> The SmallBitVector container provides the same interface as BitVector, but
 it is optimized for the case where only a small number of bits, less than
 25 or so, are needed. It also transparently supports larger bit counts, but
@@ -1630,11 +1663,11 @@ and its operator[] does not provide an assignable lvalue.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="dss_sparsebitvector">SparseBitVector</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p> The SparseBitVector container is much like BitVector, with one major
 difference: Only the bits that are set, are stored.  This makes the
 SparseBitVector much more space efficient than BitVector when the set is sparse,
@@ -1644,13 +1677,17 @@ universe).  The downside to the SparseBitVector is that setting and testing of r
 </p>
 </div>
 
+</div>
+
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="common">Helpful Hints for Common Operations</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This section describes how to perform some very simple transformations of
 LLVM code.  This is meant to give examples of common idioms used, showing the
@@ -1659,15 +1696,13 @@ you should also read about the main classes that you will be working with.  The
 <a href="#coreclasses">Core LLVM Class Hierarchy Reference</a> contains details
 and descriptions of the main classes that you should know about.</p>
 
-</div>
-
 <!-- NOTE: this section should be heavy on example code -->
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="inspection">Basic Inspection and Traversal Routines</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM compiler infrastructure have many different data structures that may
 be traversed.  Following the example of the C++ standard template library, the
@@ -1684,16 +1719,14 @@ on them, and it is easier to remember how to iterate. First we show a few common
 examples of the data structures that need to be traversed.  Other data
 structures are traversed in very similar ways.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="iterate_function">Iterating over the </a><a
   href="#BasicBlock"><tt>BasicBlock</tt></a>s in a <a
   href="#Function"><tt>Function</tt></a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>It's quite common to have a <tt>Function</tt> instance that you'd like to
 transform in some way; in particular, you'd like to manipulate its
@@ -1722,13 +1755,13 @@ exactly equivalent to <tt>(*i).size()</tt> just like you'd expect.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="iterate_basicblock">Iterating over the </a><a
   href="#Instruction"><tt>Instruction</tt></a>s in a <a
   href="#BasicBlock"><tt>BasicBlock</tt></a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Just like when dealing with <tt>BasicBlock</tt>s in <tt>Function</tt>s, it's
 easy to iterate over the individual instructions that make up
@@ -1753,13 +1786,13 @@ basic block itself: <tt>errs() &lt;&lt; *blk &lt;&lt; "\n";</tt>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="iterate_institer">Iterating over the </a><a
   href="#Instruction"><tt>Instruction</tt></a>s in a <a
   href="#Function"><tt>Function</tt></a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>If you're finding that you commonly iterate over a <tt>Function</tt>'s
 <tt>BasicBlock</tt>s and then that <tt>BasicBlock</tt>'s <tt>Instruction</tt>s,
@@ -1799,12 +1832,12 @@ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="iterate_convert">Turning an iterator into a class pointer (and
   vice-versa)</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Sometimes, it'll be useful to grab a reference (or pointer) to a class
 instance when all you've got at hand is an iterator.  Well, extracting
@@ -1876,12 +1909,12 @@ and <tt>operator*</tt> changed to return a pointer instead of a reference.</p>
 </div>
 
 <!--_______________________________________________________________________-->
-<div class="doc_subsubsection">
+<h4>
   <a name="iterate_complex">Finding call sites: a slightly more complex
   example</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Say that you're writing a FunctionPass and would like to count all the
 locations in the entire module (that is, across every <tt>Function</tt>) where a
@@ -1938,11 +1971,11 @@ class OurFunctionPass : public FunctionPass {
 </div>
 
 <!--_______________________________________________________________________-->
-<div class="doc_subsubsection">
+<h4>
   <a name="calls_and_invokes">Treating calls and invokes the same way</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>You may have noticed that the previous example was a bit oversimplified in
 that it did not deal with call sites generated by 'invoke' instructions. In
@@ -1965,11 +1998,11 @@ If you look at its definition, it has only a single pointer member.</p>
 </div>
 
 <!--_______________________________________________________________________-->
-<div class="doc_subsubsection">
+<h4>
   <a name="iterate_chains">Iterating over def-use &amp; use-def chains</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Frequently, we might have an instance of the <a
 href="/doxygen/classllvm_1_1Value.html">Value Class</a> and we want to
@@ -2026,12 +2059,12 @@ calling <tt>use/op_begin()</tt> on <tt>const Value*</tt>s or
 </div>
 
 <!--_______________________________________________________________________-->
-<div class="doc_subsubsection">
+<h4>
   <a name="iterate_preds">Iterating over predecessors &amp;
 successors of blocks</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Iterating over the predecessors and successors of a block is quite easy
 with the routines defined in <tt>"llvm/Support/CFG.h"</tt>.  Just use code like
@@ -2054,13 +2087,14 @@ succ_iterator/succ_begin/succ_end.</p>
 
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="simplechanges">Making simple changes</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>There are some primitive transformation operations present in the LLVM
 infrastructure that are worth knowing about.  When performing
@@ -2068,15 +2102,13 @@ transformations, it's fairly common to manipulate the contents of basic
 blocks. This section describes some of the common methods for doing so
 and gives example code.</p>
 
-</div>
-
 <!--_______________________________________________________________________-->
-<div class="doc_subsubsection">
+<h4>
   <a name="schanges_creating">Creating and inserting new
   <tt>Instruction</tt>s</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p><i>Instantiating Instructions</i></p>
 
@@ -2212,18 +2244,15 @@ Instruction* newInst = new Instruction(..., pi);
 </div>
 
 <!--_______________________________________________________________________-->
-<div class="doc_subsubsection">
+<h4>
   <a name="schanges_deleting">Deleting <tt>Instruction</tt>s</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Deleting an instruction from an existing sequence of instructions that form a
-<a href="#BasicBlock"><tt>BasicBlock</tt></a> is very straight-forward. First,
-you must have a pointer to the instruction that you wish to delete.  Second, you
-need to obtain the pointer to that instruction's basic block. You use the
-pointer to the basic block to get its list of instructions and then use the
-erase function to remove your instruction. For example:</p>
+<a href="#BasicBlock"><tt>BasicBlock</tt></a> is very straight-forward: just
+call the instruction's eraseFromParent() method.  For example:</p>
 
 <div class="doc_code">
 <pre>
@@ -2232,15 +2261,19 @@ I-&gt;eraseFromParent();
 </pre>
 </div>
 
+<p>This unlinks the instruction from its containing basic block and deletes 
+it.  If you'd just like to unlink the instruction from its containing basic
+block but not delete it, you can use the <tt>removeFromParent()</tt> method.</p>
+
 </div>
 
 <!--_______________________________________________________________________-->
-<div class="doc_subsubsection">
+<h4>
   <a name="schanges_replacing">Replacing an <tt>Instruction</tt> with another
   <tt>Value</tt></a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p><i>Replacing individual instructions</i></p>
 
@@ -2248,7 +2281,7 @@ I-&gt;eraseFromParent();
 permits use of two very useful replace functions: <tt>ReplaceInstWithValue</tt>
 and <tt>ReplaceInstWithInst</tt>.</p>
 
-<h4><a name="schanges_deleting">Deleting <tt>Instruction</tt>s</a></h4>
+<h5><a name="schanges_deleting">Deleting <tt>Instruction</tt>s</a></h5>
 
 <ul>
   <li><tt>ReplaceInstWithValue</tt>
@@ -2301,11 +2334,11 @@ ReplaceInstWithValue, ReplaceInstWithInst -->
 </div>
 
 <!--_______________________________________________________________________-->
-<div class="doc_subsubsection">
+<h4>
   <a name="schanges_deletingGV">Deleting <tt>GlobalVariable</tt>s</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Deleting a global variable from a module is just as easy as deleting an 
 Instruction. First, you must have a pointer to the global variable that you wish
@@ -2322,12 +2355,14 @@ GV-&gt;eraseFromParent();
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="create_types">How to Create Types</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>In generating IR, you may need some complex types.  If you know these types
 statically, you can use <tt>TypeBuilder&lt;...&gt;::get()</tt>, defined
@@ -2362,13 +2397,15 @@ comment</a> for more details.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="threading">Threads and LLVM</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 <p>
 This section describes the interaction of the LLVM APIs with multithreading,
 both on the part of client applications, and in the JIT, in the hosted
@@ -2391,14 +2428,13 @@ compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and
 using the resultant compiler to build a copy of LLVM with multithreading
 support.
 </p>
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="startmultithreaded">Entering and Exiting Multithreaded Mode</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 In order to properly protect its internal data structures while avoiding 
@@ -2431,11 +2467,11 @@ result in concurrent LLVM API calls.
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="shutdown">Ending Execution with <tt>llvm_shutdown()</tt></a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>
 When you are done using the LLVM APIs, you should call <tt>llvm_shutdown()</tt>
 to deallocate memory used for internal structures.  This will also invoke 
@@ -2451,11 +2487,11 @@ destructor.
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="managedstatic">Lazy Initialization with <tt>ManagedStatic</tt></a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>
 <tt>ManagedStatic</tt> is a utility class in LLVM used to implement static
 initialization of static resources, such as the global type tables.  Before the
@@ -2480,11 +2516,11 @@ and only if you know what you're doing!
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="llvmcontext">Achieving Isolation with <tt>LLVMContext</tt></a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>
 <tt>LLVMContext</tt> is an opaque class in the LLVM API which clients can use
 to operate multiple, isolated instances of LLVM concurrently within the same
@@ -2524,11 +2560,11 @@ isolation is not a concern.
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="jitthreading">Threads and the JIT</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>
 LLVM's "eager" JIT compiler is safe to use in threaded programs.  Multiple
 threads can call <tt>ExecutionEngine::getPointerToFunction()</tt> or
@@ -2551,26 +2587,27 @@ access, but we suggest using only the eager JIT in threaded programs.
 </p>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="advanced">Advanced Topics</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 <p>
 This section describes some of the advanced or obscure API's that most clients
 do not need to be aware of.  These API's tend manage the inner workings of the
 LLVM system, and only need to be accessed in unusual circumstances.
 </p>
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="TypeResolve">LLVM Type Resolution</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 The LLVM type system has a very simple goal: allow clients to compare types for
@@ -2599,14 +2636,12 @@ Third, a concrete type is a type that is not an abstract type (e.g. "<tt>{ i32,
 float }</tt>").
 </p>
 
-</div>
-
 <!-- ______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="BuildRecType">Basic Recursive Type Construction</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 Because the most common question is "how do I build a recursive type with LLVM",
@@ -2658,11 +2693,11 @@ href="#PATypeHolder">PATypeHolder class</a>.
 </div>
 
 <!-- ______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="refineAbstractTypeTo">The <tt>refineAbstractTypeTo</tt> method</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>
 The <tt>refineAbstractTypeTo</tt> method starts the type unification process.
 While this method is actually a member of the DerivedType class, it is most
@@ -2688,11 +2723,11 @@ complex datastructures.
 </div>
 
 <!-- ______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="PATypeHolder">The PATypeHolder Class</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>
 PATypeHolder is a form of a "smart pointer" for Type objects.  When VMCore
 happily goes about nuking types that become isomorphic to existing types, it
@@ -2710,11 +2745,11 @@ Type is maintained by PATypeHolder objects.
 </div>
 
 <!-- ______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="AbstractTypeUser">The AbstractTypeUser Class</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 Some data structures need more to perform more complex updates when types get
@@ -2728,14 +2763,15 @@ objects) can never be refined.
 </p>
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="SymbolTable">The <tt>ValueSymbolTable</tt> and
    <tt>TypeSymbolTable</tt> classes</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>The <tt><a href="http://llvm.org/doxygen/classllvm_1_1ValueSymbolTable.html">
 ValueSymbolTable</a></tt> class provides a symbol table that the <a
 href="#Function"><tt>Function</tt></a> and <a href="#Module">
@@ -2766,11 +2802,11 @@ insert entries into the symbol table.</p>
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="UserLayout">The <tt>User</tt> and owned <tt>Use</tt> classes' memory layout</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>The <tt><a href="http://llvm.org/doxygen/classllvm_1_1User.html">
 User</a></tt> class provides a basis for expressing the ownership of <tt>User</tt>
 towards other <tt><a href="http://llvm.org/doxygen/classllvm_1_1Value.html">
@@ -2779,18 +2815,19 @@ Use</a></tt> helper class is employed to do the bookkeeping and to facilitate <i
 addition and removal.</p>
 
 <!-- ______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="Use2User">Interaction and relationship between <tt>User</tt> and <tt>Use</tt> objects</a>
-</div>
+<h4>
+  <a name="Use2User">
+    Interaction and relationship between <tt>User</tt> and <tt>Use</tt> objects
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>
 A subclass of <tt>User</tt> can choose between incorporating its <tt>Use</tt> objects
 or refer to them out-of-line by means of a pointer. A mixed variant
 (some <tt>Use</tt>s inline others hung off) is impractical and breaks the invariant
 that the <tt>Use</tt> objects belonging to the same <tt>User</tt> form a contiguous array.
 </p>
-</div>
 
 <p>
 We have 2 different layouts in the <tt>User</tt> (sub)classes:
@@ -2839,17 +2876,18 @@ enforce the following memory layouts:</p>
 <i>(In the above figures '<tt>P</tt>' stands for the <tt>Use**</tt> that
     is stored in each <tt>Use</tt> object in the member <tt>Use::Prev</tt>)</i>
 
+</div>
+
 <!-- ______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="Waymarking">The waymarking algorithm</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>
 Since the <tt>Use</tt> objects are deprived of the direct (back)pointer to
 their <tt>User</tt> objects, there must be a fast and exact method to
 recover it. This is accomplished by the following scheme:</p>
-</div>
 
 A bit-encoding in the 2 LSBits (least significant bits) of the <tt>Use::Prev</tt> allows to find the
 start of the <tt>User</tt> object:
@@ -2880,15 +2918,16 @@ Only the significant number of bits need to be stored between the
 stops, so that the <i>worst case is 20 memory accesses</i> when there are
 1000 <tt>Use</tt> objects associated with a <tt>User</tt>.</p>
 
+</div>
+
 <!-- ______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ReferenceImpl">Reference implementation</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>
 The following literate Haskell fragment demonstrates the concept:</p>
-</div>
 
 <div class="doc_code">
 <pre>
@@ -2970,10 +3009,14 @@ And here is the result of &lt;deepCheck identityProp&gt;:</p>
 OK, passed 500 tests.
 </pre>
 
+</div>
+
 <!-- ______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="Tagging">Tagging considerations</a>
-</div>
+</h4>
+
+<div>
 
 <p>
 To maintain the invariant that the 2 LSBits of each <tt>Use**</tt> in <tt>Use</tt>
@@ -2989,13 +3032,17 @@ the LSBit set. (Portability is relying on the fact that all known compilers plac
 
 </div>
 
-  <!-- *********************************************************************** -->
-<div class="doc_section">
-  <a name="coreclasses">The Core LLVM Class Hierarchy Reference </a>
 </div>
+
+</div>
+
+<!-- *********************************************************************** -->
+<h2>
+  <a name="coreclasses">The Core LLVM Class Hierarchy Reference </a>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 <p><tt>#include "<a href="/doxygen/Type_8h-source.html">llvm/Type.h</a>"</tt>
 <br>doxygen info: <a href="/doxygen/classllvm_1_1Type.html">Type Class</a></p>
 
@@ -3004,14 +3051,12 @@ being inspected or transformed.  The core LLVM classes are defined in
 header files in the <tt>include/llvm/</tt> directory, and implemented in
 the <tt>lib/VMCore</tt> directory.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="Type">The <tt>Type</tt> class and Derived Types</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
   <p><tt>Type</tt> is a superclass of all type classes. Every <tt>Value</tt> has
   a <tt>Type</tt>. <tt>Type</tt> cannot be instantiated directly but only
@@ -3026,14 +3071,13 @@ the <tt>lib/VMCore</tt> directory.</p>
   be performed with address equality of the Type Instance. That is, given two 
   <tt>Type*</tt> values, the types are identical if the pointers are identical.
   </p>
-</div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="m_Type">Important Public Methods</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <ul>
   <li><tt>bool isIntegerTy() const</tt>: Returns true for any integer type.</li>
@@ -3051,10 +3095,10 @@ the <tt>lib/VMCore</tt> directory.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="derivedtypes">Important Derived Types</a>
-</div>
-<div class="doc_text">
+</h4>
+<div>
 <dl>
   <dt><tt>IntegerType</tt></dt>
   <dd>Subclass of DerivedType that represents integer types of any bit width. 
@@ -3116,14 +3160,14 @@ the <tt>lib/VMCore</tt> directory.</p>
 </dl>
 </div>
 
-
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="Module">The <tt>Module</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p><tt>#include "<a
 href="/doxygen/Module_8h-source.html">llvm/Module.h</a>"</tt><br> doxygen info:
@@ -3138,14 +3182,12 @@ href="#GlobalVariable"><tt>GlobalVariable</tt></a>s, and a <a
 href="#SymbolTable"><tt>SymbolTable</tt></a>.  Additionally, it contains a few
 helpful member functions that try to make common operations easy.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="m_Module">Important Public Members of the <tt>Module</tt> class</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <ul>
   <li><tt>Module::Module(std::string name = "")</tt></li>
@@ -3244,13 +3286,14 @@ provide a name for it (probably based on the name of the translation unit).</p>
 
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="Value">The <tt>Value</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p><tt>#include "<a href="/doxygen/Value_8h-source.html">llvm/Value.h</a>"</tt>
 <br> 
@@ -3301,14 +3344,12 @@ the class that
 represents this value.  Although this may take some getting used to, it
 simplifies the representation and makes it easier to manipulate.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="m_Value">Important Public Members of the <tt>Value</tt> class</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <ul>
   <li><tt>Value::use_iterator</tt> - Typedef for iterator over the
@@ -3355,12 +3396,14 @@ Inst-&gt;replaceAllUsesWith(ConstVal);
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="User">The <tt>User</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
   
 <p>
 <tt>#include "<a href="/doxygen/User_8h-source.html">llvm/User.h</a>"</tt><br>
@@ -3379,14 +3422,12 @@ Single Assignment (SSA) form, there can only be one definition referred to,
 allowing this direct connection.  This connection provides the use-def
 information in LLVM.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="m_User">Important Public Members of the <tt>User</tt> class</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>User</tt> class exposes the operand list in two ways: through
 an index access interface and through an iterator based interface.</p>
@@ -3409,12 +3450,14 @@ the operands of a <tt>User</tt>.</p></li>
 
 </div>    
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="Instruction">The <tt>Instruction</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p><tt>#include "</tt><tt><a
 href="/doxygen/Instruction_8h-source.html">llvm/Instruction.h</a>"</tt><br>
@@ -3445,14 +3488,13 @@ href="#CmpInst">CmpInst</a></tt>).  Unfortunately, the use of macros in
 this file confuses doxygen, so these enum values don't show up correctly in the
 <a href="/doxygen/classllvm_1_1Instruction.html">doxygen output</a>.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="s_Instruction">Important Subclasses of the <tt>Instruction</tt>
-  class</a>
-</div>
-<div class="doc_text">
+<h4>
+  <a name="s_Instruction">
+    Important Subclasses of the <tt>Instruction</tt> class
+  </a>
+</h4>
+<div>
   <ul>
     <li><tt><a name="BinaryOperator">BinaryOperator</a></tt>
     <p>This subclasses represents all two operand instructions whose operands
@@ -3471,12 +3513,13 @@ this file confuses doxygen, so these enum values don't show up correctly in the
   </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="m_Instruction">Important Public Members of the <tt>Instruction</tt>
-  class</a>
-</div>
+<h4>
+  <a name="m_Instruction">
+    Important Public Members of the <tt>Instruction</tt> class
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <ul>
   <li><tt><a href="#BasicBlock">BasicBlock</a> *getParent()</tt>
@@ -3496,12 +3539,14 @@ and it has no name</p></li>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="Constant">The <tt>Constant</tt> class and subclasses</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Constant represents a base class for different types of constants. It
 is subclassed by ConstantInt, ConstantArray, etc. for representing 
@@ -3509,11 +3554,9 @@ the various types of Constants.  <a href="#GlobalValue">GlobalValue</a> is also
 a subclass, which represents the address of a global variable or function.
 </p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">Important Subclasses of Constant </div>
-<div class="doc_text">
+<h4>Important Subclasses of Constant</h4>
+<div>
 <ul>
   <li>ConstantInt : This subclass of Constant represents an integer constant of
   any width.
@@ -3561,13 +3604,14 @@ a subclass, which represents the address of a global variable or function.
 </ul>
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="GlobalValue">The <tt>GlobalValue</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p><tt>#include "<a
 href="/doxygen/GlobalValue_8h-source.html">llvm/GlobalValue.h</a>"</tt><br>
@@ -3607,15 +3651,14 @@ dereference the pointer with <tt>GetElementPtrInst</tt> first, then its elements
 can be accessed. This is explained in the <a href="LangRef.html#globalvars">LLVM
 Language Reference Manual</a>.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="m_GlobalValue">Important Public Members of the <tt>GlobalValue</tt>
-  class</a>
-</div>
+<h4>
+  <a name="m_GlobalValue">
+    Important Public Members of the <tt>GlobalValue</tt> class
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <ul>
   <li><tt>bool hasInternalLinkage() const</tt><br>
@@ -3631,12 +3674,14 @@ GlobalValue is currently embedded into.</p></li>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="Function">The <tt>Function</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p><tt>#include "<a
 href="/doxygen/Function_8h-source.html">llvm/Function.h</a>"</tt><br> doxygen
@@ -3683,15 +3728,15 @@ href="#Argument"><tt>Argument</tt></a>s in the function body.</p>
 <p>Note that <tt>Function</tt> is a <a href="#GlobalValue">GlobalValue</a>
 and therefore also a <a href="#Constant">Constant</a>. The value of the function
 is its address (after linking) which is guaranteed to be constant.</p>
-</div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="m_Function">Important Public Members of the <tt>Function</tt>
-  class</a>
-</div>
+<h4>
+  <a name="m_Function">
+    Important Public Members of the <tt>Function</tt> class
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <ul>
   <li><tt>Function(const </tt><tt><a href="#FunctionType">FunctionType</a>
@@ -3769,12 +3814,14 @@ iterator<br>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="GlobalVariable">The <tt>GlobalVariable</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p><tt>#include "<a
 href="/doxygen/GlobalVariable_8h-source.html">llvm/GlobalVariable.h</a>"</tt>
@@ -3796,15 +3843,15 @@ variables may have an initial value (which must be a
 <a href="#Constant"><tt>Constant</tt></a>), and if they have an initializer, 
 they may be marked as "constant" themselves (indicating that their contents 
 never change at runtime).</p>
-</div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="m_GlobalVariable">Important Public Members of the
-  <tt>GlobalVariable</tt> class</a>
-</div>
+<h4>
+  <a name="m_GlobalVariable">
+    Important Public Members of the <tt>GlobalVariable</tt> class
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <ul>
   <li><tt>GlobalVariable(const </tt><tt><a href="#Type">Type</a> *Ty, bool
@@ -3842,13 +3889,14 @@ never change at runtime).</p>
 
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="BasicBlock">The <tt>BasicBlock</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p><tt>#include "<a
 href="/doxygen/BasicBlock_8h-source.html">llvm/BasicBlock.h</a>"</tt><br>
@@ -3873,15 +3921,14 @@ href="#Value"><tt>Value</tt></a>s, because they are referenced by instructions
 like branches and can go in the switch tables. <tt>BasicBlock</tt>s have type
 <tt>label</tt>.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="m_BasicBlock">Important Public Members of the <tt>BasicBlock</tt>
-  class</a>
-</div>
+<h4>
+  <a name="m_BasicBlock">
+    Important Public Members of the <tt>BasicBlock</tt> class
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 <ul>
 
 <li><tt>BasicBlock(const std::string &amp;Name = "", </tt><tt><a
@@ -3933,13 +3980,14 @@ returned.</p></li>
 
 </div>
 
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="Argument">The <tt>Argument</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>This subclass of Value defines the interface for incoming formal
 arguments to a function. A Function maintains a list of its formal
@@ -3947,6 +3995,8 @@ arguments. An argument has a pointer to the parent Function.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 <hr>
 <address>
@@ -3957,8 +4007,8 @@ arguments. An argument has a pointer to the parent Function.</p>
 
   <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a> and
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-02-17 03:19:22 +0100 (Thu, 17 Feb 2011) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/Projects.html b/docs/Projects.html
index 76da086..3c6d4ad 100644
--- a/docs/Projects.html
+++ b/docs/Projects.html
@@ -7,7 +7,7 @@
 </head>
 <body>
 
-<div class="doc_title">Creating an LLVM Project</div>
+<h1>Creating an LLVM Project</h1>
 
 <ol>
 <li><a href="#overview">Overview</a></li>
@@ -30,10 +30,10 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="overview">Overview</a></div>
+<h2><a name="overview">Overview</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM build system is designed to facilitate the building of third party
 projects that use LLVM header files, libraries, and tools.  In order to use
@@ -73,12 +73,12 @@ provide enough information on how to write your own Makefiles.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="create">Create a Project from the Sample Project</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Follow these simple steps to start your project:</p>
 
@@ -145,12 +145,12 @@ project should build.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="source">Source Tree Layout</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>In order to use the LLVM build system, you will want to organize your
 source code so that it can benefit from the build system's features.
@@ -230,26 +230,24 @@ your <b>tools</b> directory.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="makefiles">Writing LLVM Style Makefiles</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM build system provides a convenient way to build libraries and
 executables.  Most of your project Makefiles will only need to define a few
 variables.  Below is a list of the variables one can set and what they can
 do:</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="reqVars">Required Variables</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <dl>
   <dt>LEVEL
@@ -263,11 +261,11 @@ do:</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="varsBuildDir">Variables for Building Subdirectories</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <dl>
   <dt>DIRS
@@ -294,11 +292,11 @@ do:</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="varsBuildLib">Variables for Building Libraries</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <dl>
   <dt>LIBRARYNAME
@@ -325,11 +323,11 @@ do:</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="varsBuildProg">Variables for Building Programs</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <dl>
   <dt>TOOLNAME
@@ -368,11 +366,11 @@ do:</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="miscVars">Miscellaneous Variables</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <dl>
   <dt>ExtraSource
@@ -398,13 +396,15 @@ do:</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="objcode">Placement of Object Code</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The final location of built libraries and executables will depend upon
 whether you do a Debug, Release, or Profile build.</p>
@@ -427,12 +427,12 @@ whether you do a Debug, Release, or Profile build.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="help">Further Help</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>If you have any questions or need any help creating an LLVM project,
 the LLVM team would be more than happy to help.  You can always post your
@@ -451,9 +451,9 @@ Mailing List</a>.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:criswell@uiuc.edu">John Criswell</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index 8429837..71bf16e 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -3,13 +3,12 @@
 <html>
 <head>
   <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta encoding="utf8">
   <link rel="stylesheet" href="llvm.css" type="text/css">
-  <title>LLVM 2.8 Release Notes</title>
+  <title>LLVM 2.9 Release Notes</title>
 </head>
 <body>
 
-<div class="doc_title">LLVM 2.8 Release Notes</div>
+<h1>LLVM 2.9 Release Notes</h1>
 
 <img align=right src="http://llvm.org/img/DragonSmall.png"
     width="136" height="136" alt="LLVM Dragon Logo">
@@ -17,35 +16,35 @@
 <ol>
   <li><a href="#intro">Introduction</a></li>
   <li><a href="#subproj">Sub-project Status Update</a></li>
-  <li><a href="#externalproj">External Projects Using LLVM 2.8</a></li>
-  <li><a href="#whatsnew">What's New in LLVM 2.8?</a></li>
+  <li><a href="#externalproj">External Projects Using LLVM 2.9</a></li>
+  <li><a href="#whatsnew">What's New in LLVM 2.9?</a></li>
   <li><a href="GettingStarted.html">Installation Instructions</a></li>
   <li><a href="#knownproblems">Known Problems</a></li>
   <li><a href="#additionalinfo">Additional Information</a></li>
 </ol>
 
 <div class="doc_author">
-  <p>Written by the <a href="http://llvm.org">LLVM Team</a></p>
+  <p>Written by the <a href="http://llvm.org/">LLVM Team</a></p>
 </div>
 
 <!--
-<h1 style="color:red">These are in-progress notes for the upcoming LLVM 2.8
+<h1 style="color:red">These are in-progress notes for the upcoming LLVM 2.9
 release.<br>
 You may prefer the
-<a href="http://llvm.org/releases/2.7/docs/ReleaseNotes.html">LLVM 2.7
+<a href="http://llvm.org/releases/2.8/docs/ReleaseNotes.html">LLVM 2.8
 Release Notes</a>.</h1>
--->
+ -->
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="intro">Introduction</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This document contains the release notes for the LLVM Compiler
-Infrastructure, release 2.8.  Here we describe the status of LLVM, including
+Infrastructure, release 2.9.  Here we describe the status of LLVM, including
 major improvements from the previous release and significant known problems.
 All LLVM releases may be downloaded from the <a
 href="http://llvm.org/releases/">LLVM releases web site</a>.</p>
@@ -62,51 +61,37 @@ current one.  To see the release notes for a specific release, please see the
 <a href="http://llvm.org/releases/">releases page</a>.</p>
 
 </div>
- 
-
-<!--
-Almost dead code.
-  include/llvm/Analysis/LiveValues.h => Dan
-  lib/Transforms/IPO/MergeFunctions.cpp => consider for 2.8.
-  GEPSplitterPass
--->
- 
    
-<!-- Features that need text if they're finished for 2.9:
+<!-- Features that need text if they're finished for 3.1:
+  ARM EHABI
   combiner-aa?
   strong phi elim
   loop dependence analysis
-  TBAA
   CorrelatedValuePropagation
+  lib/Transforms/IPO/MergeFunctions.cpp => consider for 3.1.
  -->
  
- <!-- Announcement, lldb, libc++ -->
- 
-
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="subproj">Sub-project Status Update</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 <p>
-The LLVM 2.8 distribution currently consists of code from the core LLVM
+The LLVM 2.9 distribution currently consists of code from the core LLVM
 repository (which roughly includes the LLVM optimizers, code generators
 and supporting tools), the Clang repository and the llvm-gcc repository.  In
 addition to this code, the LLVM Project includes other sub-projects that are in
 development.  Here we include updates on these subprojects.
 </p>
 
-</div>
-
-
 <!--=========================================================================-->
-<div class="doc_subsection">
+<h3>
 <a name="clang">Clang: C/C++/Objective-C Frontend Toolkit</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p><a href="http://clang.llvm.org/">Clang</a> is an LLVM front end for the C,
 C++, and Objective-C languages. Clang aims to provide a better user experience
@@ -115,112 +100,61 @@ standards, fast compilation, and low memory use. Like LLVM, Clang provides a
 modular, library-based architecture that makes it suitable for creating or
 integrating with other development tools. Clang is considered a
 production-quality compiler for C, Objective-C, C++ and Objective-C++ on x86
-(32- and 64-bit), and for darwin-arm targets.</p>
-
-<p>In the LLVM 2.8 time-frame, the Clang team has made many improvements:</p>
-
-  <ul>
-    <li>Clang C++ is now feature-complete with respect to the ISO C++ 1998 and 2003 standards.</li>
-    <li>Added support for Objective-C++.</li>
-    <li>Clang now uses LLVM-MC to directly generate object code and to parse inline assembly (on Darwin).</li>
-    <li>Introduced many new warnings, including <code>-Wmissing-field-initializers</code>, <code>-Wshadow</code>, <code>-Wno-protocol</code>, <code>-Wtautological-compare</code>, <code>-Wstrict-selector-match</code>, <code>-Wcast-align</code>, <code>-Wunused</code> improvements, and greatly improved format-string checking.</li>
-    <li>Introduced the "libclang" library, a C interface to Clang intended to support IDE clients.</li>
-    <li>Added support for <code>#pragma GCC visibility</code>, <code>#pragma align</code>, and others.</li>
-    <li>Added support for SSE, AVX, ARM NEON, and AltiVec.</li>
-    <li>Improved support for many Microsoft extensions.</li>
-    <li>Implemented support for blocks in C++.</li>
-    <li>Implemented precompiled headers for C++.</li>
-    <li>Improved abstract syntax trees to retain more accurate source information.</li>
-    <li>Added driver support for handling LLVM IR and bitcode files directly.</li>
-    <li>Major improvements to compiler correctness for exception handling.</li>
-    <li>Improved generated code quality in some areas:
-      <ul>
-        <li>Good code generation for X86-32 and X86-64 ABI handling.</li>
-        <li>Improved code generation for bit-fields, although important work remains.</li>
-      </ul>
-    </li>
-  </ul>
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="clangsa">Clang Static Analyzer</a>
-</div>
-
-<div class="doc_text">
-
-<p>The <a href="http://clang-analyzer.llvm.org/">Clang Static Analyzer</a>
-   project is an effort to use static source code analysis techniques to
-   automatically find bugs in C and Objective-C programs (and hopefully <a
-   href="http://clang-analyzer.llvm.org/dev_cxx.html">C++ in the
-   future</a>!).  The tool is very good at finding bugs that occur on specific
-   paths through code, such as on error conditions.</p>
-
-<p>The LLVM 2.8 release fixes a number of bugs and slightly improves precision
-   over 2.7, but there are no major new features in the release. 
+(32- and 64-bit), and for darwin/arm targets.</p>
+
+<p>In the LLVM 2.9 time-frame, the Clang team has made many improvements in C,
+C++ and Objective-C support.  C++ support is now generally rock solid, has
+been exercised on a broad variety of code, and has several new <a 
+href="http://clang.llvm.org/cxx_status.html#cxx0x">C++'0x features</a>
+implemented (such as rvalue references and variadic templates).  LLVM 2.9 has
+also brought in a large range of bug fixes and minor features (e.g. __label__
+support), and is much more compatible with the Linux Kernel.</p>  
+  
+<p>If Clang rejects your code but another compiler accepts it, please take a
+look at the <a href="http://clang.llvm.org/compatibility.html">language
+compatibility</a> guide to make sure this is not intentional or a known issue.
 </p>
 
 </div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
-<a name="dragonegg">DragonEgg: llvm-gcc ported to gcc-4.5</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://dragonegg.llvm.org/">DragonEgg</a> is a port of llvm-gcc to
-gcc-4.5.  Unlike llvm-gcc, dragonegg in theory does not require any gcc-4.5
-modifications whatsoever (currently one small patch is needed) thanks to the
-new <a href="http://gcc.gnu.org/wiki/plugins">gcc plugin architecture</a>.
-DragonEgg is a gcc plugin that makes gcc-4.5 use the LLVM optimizers and code
-generators instead of gcc's, just like with llvm-gcc.
-</p>
+<h3>
+<a name="dragonegg">DragonEgg: GCC front-ends, LLVM back-end</a>
+</h3>
 
+<div>
 <p>
-DragonEgg is still a work in progress, but it is able to compile a lot of code,
-for example all of gcc, LLVM and clang.  Currently Ada, C, C++ and Fortran work
-well, while all other languages either don't work at all or only work poorly.
-For the moment only the x86-32 and x86-64 targets are supported, and only on
-linux and darwin (darwin may need additional gcc patches).
+<a href="http://dragonegg.llvm.org/">DragonEgg</a> is a
+<a href="http://gcc.gnu.org/wiki/plugins">gcc plugin</a> that replaces GCC's
+optimizers and code generators with LLVM's.
+Currently it requires a patched version of gcc-4.5.
+The plugin can target the x86-32 and x86-64 processor families and has been
+used successfully on the Darwin, FreeBSD and Linux platforms.
+The Ada, C, C++ and Fortran languages work well.
+The plugin is capable of compiling plenty of Obj-C, Obj-C++ and Java but it is
+not known whether the compiled code actually works or not!
 </p>
 
 <p>
-The 2.8 release has the following notable changes:
+The 2.9 release has the following notable changes:
 <ul>
-<li>The plugin loads faster due to exporting fewer symbols.</li>
-<li>Additional vector operations such as addps256 are now supported.</li>
-<li>Ada global variables with no initial value are no longer zero initialized,
-resulting in better optimization.</li>
-<li>The '-fplugin-arg-dragonegg-enable-gcc-optzns' flag now runs all gcc
-optimizers, rather than just a handful.</li>
-<li>Fortran programs using common variables now link correctly.</li>
-<li>GNU OMP constructs no longer crash the compiler.</li>
+<li>The plugin is much more stable when compiling Fortran.</li>
+<li>Inline assembly where an asm output is tied to an input of a different size
+is now supported in many more cases.</li>
+<li>Basic support for the __float128 type was added.  It is now possible to
+generate LLVM IR from programs using __float128 but code generation does not
+work yet.</li>
+<li>Compiling Java programs no longer systematically crashes the plugin.</li>
 </ul>
 
 </div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
-<a name="vmkit">VMKit: JVM/CLI Virtual Machine Implementation</a>
-</div>
-
-<div class="doc_text">
-<p>
-The <a href="http://vmkit.llvm.org/">VMKit project</a> is an implementation of
-a Java Virtual Machine (Java VM or JVM) that uses LLVM for static and
-just-in-time compilation.  As of LLVM 2.8, VMKit now supports copying garbage
-collectors, and can be configured to use MMTk's copy mark-sweep garbage
-collector.  In LLVM 2.8, the VMKit .NET VM is no longer being maintained.
-</p>
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
+<h3>
 <a name="compiler-rt">compiler-rt: Compiler Runtime Library</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>
 The new LLVM <a href="http://compiler-rt.llvm.org/">compiler-rt project</a>
 is a simple library that provides an implementation of the low-level
@@ -231,21 +165,22 @@ function. The compiler-rt library provides highly optimized implementations of
 this and other low-level routines (some are 3x faster than the equivalent
 libgcc routines).</p>
 
-<p>
-All of the code in the compiler-rt project is available under the standard LLVM
-License, a "BSD-style" license.  New in LLVM 2.8, compiler_rt now supports 
-soft floating point (for targets that don't have a real floating point unit),
-and includes an extensive testsuite for the "blocks" language feature and the
-blocks runtime included in compiler_rt.</p>
+<p>In the LLVM 2.9 timeframe, compiler_rt has had several minor changes for
+  better ARM support, and a fairly major license change.  All of the code in the
+  compiler-rt project is now <a href="DeveloperPolicy.html#license">dual
+  licensed</a> under MIT and UIUC license, which allows you to use compiler-rt
+  in applications without the binary copyright reproduction clause.  If you
+  prefer the LLVM/UIUC license, you are free to continue using it under that
+  license as well.</p>
 
 </div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
+<h3>
 <a name="lldb">LLDB: Low Level Debugger</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>
 <a href="http://lldb.llvm.org/">LLDB</a> is a brand new member of the LLVM
 umbrella of projects. LLDB is a next generation, high-performance debugger. It
@@ -254,20 +189,20 @@ libraries in the larger LLVM Project, such as the Clang expression parser, the
 LLVM disassembler and the LLVM JIT.</p>
 
 <p>
-LLDB is in early development and not included as part of the LLVM 2.8 release,
-but is mature enough to support basic debugging scenarios on Mac OS X in C,
-Objective-C and C++.  We'd really like help extending and expanding LLDB to 
-support new platforms, new languages, new architectures, and new features.
-</p>
+LLDB is has advanced by leaps and bounds in the 2.9 timeframe.  It is
+dramatically more stable and useful, and includes both a new <a 
+href="http://lldb.llvm.org/tutorial.html">tutorial</a> and a <a
+href="http://lldb.llvm.org/lldb-gdb.html">side-by-side comparison with 
+GDB</a>.</p>
 
 </div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
+<h3>
 <a name="libc++">libc++: C++ Standard Library</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>
 <a href="http://libcxx.llvm.org/">libc++</a> is another new member of the LLVM
 family.  It is an implementation of the C++ standard library, written from the
@@ -275,21 +210,56 @@ ground up to specifically target the forthcoming C++'0X standard and focus on
 delivering great performance.</p>
 
 <p>
-As of the LLVM 2.8 release, libc++ is virtually feature complete, but would
-benefit from more testing and better integration with Clang++.  It is also
-looking forward to the C++ committee finalizing the C++'0x standard.
+In the LLVM 2.9 timeframe, libc++ has had numerous bugs fixed, and is now being
+co-developed with Clang's C++'0x mode.</p>
+  
+<p>
+Like compiler_rt, libc++ is now <a href="DeveloperPolicy.html#license">dual
+  licensed</a> under the MIT and UIUC license, allowing it to be used more
+  permissively.
 </p>
 
 </div>
 
 
+<!--=========================================================================-->
+<h3>
+<a name="LLBrowse">LLBrowse: IR Browser</a>
+</h3>
+
+<div>
+<p>
+<a href="http://llvm.org/svn/llvm-project/llbrowse/trunk/doc/LLBrowse.html">
+  LLBrowse</a> is an interactive viewer for LLVM modules. It can load any LLVM
+  module and displays its contents as an expandable tree view, facilitating an
+  easy way to inspect types, functions, global variables, or metadata nodes. It
+  is fully cross-platform, being based on the popular wxWidgets GUI toolkit.
+</p>
+</div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
-<a name="klee">KLEE: A Symbolic Execution Virtual Machine</a>
+<h3>
+<a name="vmkit">VMKit</a>
+</h3>
+
+<div>
+<p>The <a href="http://vmkit.llvm.org/">VMKit project</a> is an implementation
+  of a Java Virtual Machine (Java VM or JVM) that uses LLVM for static and
+  just-in-time compilation. As of LLVM 2.9, VMKit now supports generational
+  garbage collectors. The garbage collectors are provided by the MMTk framework,
+  and VMKit can be configured to use one of the numerous implemented collectors
+  of MMTk.
+</p>
 </div>
+  
+  
+<!--=========================================================================-->
+<!--
+<h3>
+<a name="klee">KLEE: A Symbolic Execution Virtual Machine</a>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>
 <a href="http://klee.llvm.org/">KLEE</a> is a symbolic execution framework for
 programs in LLVM bitcode form. KLEE tries to symbolically evaluate "all" paths
@@ -298,437 +268,347 @@ states. This allows it to construct testcases that lead to faults and can even
 be used to verify some algorithms.
 </p>
 
-<p>Although KLEE does not have any major new features as of 2.8, we have made
-various minor improvements, particular to ease development:</p>
-<ul>
-  <li>Added support for LLVM 2.8. KLEE currently maintains compatibility with
-    LLVM 2.6, 2.7, and 2.8.</li>
-  <li>Added a buildbot for 2.6, 2.7, and trunk. A 2.8 buildbot will be coming
-    soon following release.</li>
-  <li>Fixed many C++ code issues to allow building with Clang++. Mostly
-    complete, except for the version of MiniSAT which is inside the KLEE STP
-    version.</li>
-  <li>Improved support for building with separate source and build
-    directories.</li>
-  <li>Added support for "long double" on x86.</li>
-  <li>Initial work on KLEE support for using 'lit' test runner instead of
-    DejaGNU.</li>
-  <li>Added <tt>configure</tt> support for using an external version of
-    STP.</li>
-</ul>
+<p>UPDATE!</p>
+</div>-->
 
 </div>
 
-
 <!-- *********************************************************************** -->
-<div class="doc_section">
-  <a name="externalproj">External Open Source Projects Using LLVM 2.8</a>
-</div>
+<h2>
+  <a name="externalproj">External Open Source Projects Using LLVM 2.9</a>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>An exciting aspect of LLVM is that it is used as an enabling technology for
    a lot of other language and tools projects.  This section lists some of the
-   projects that have already been updated to work with LLVM 2.8.</p>
-</div>
+   projects that have already been updated to work with LLVM 2.9.</p>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
-<a name="tce">TTA-based Codesign Environment (TCE)</a>
-</div>
+<h3>Crack Programming Language</h3>
 
-<div class="doc_text">
+<div>
 <p>
-<a href="http://tce.cs.tut.fi/">TCE</a> is a toolset for designing
-application-specific processors (ASP) based on the Transport triggered
-architecture (TTA). The toolset provides a complete co-design flow from C/C++
-programs down to synthesizable VHDL and parallel program binaries. Processor
-customization points include the register files, function units, supported
-operations, and the interconnection network.</p>
-
-<p>TCE uses llvm-gcc/Clang and LLVM for C/C++ language support, target
-independent optimizations and also for parts of code generation. It generates
-new LLVM-based code generators "on the fly" for the designed TTA processors and
-loads them in to the compiler backend as runtime libraries to avoid per-target
-recompilation of larger parts of the compiler chain.</p>
-
+<a href="http://code.google.com/p/crack-language/">Crack</a> aims to provide the
+ease of development of a scripting language with the performance of a compiled
+language. The language derives concepts from C++, Java and Python, incorporating
+object-oriented programming, operator overloading and strong typing.</p>
 </div>
-
+  
+  
 <!--=========================================================================-->
-<div class="doc_subsection">
-<a name="Horizon">Horizon Bytecode Compiler</a>
+<h3>TTA-based Codesign Environment (TCE)</h3>
+  
+<div>
+<p>TCE is a toolset for designing application-specific processors (ASP) based on
+the Transport triggered architecture (TTA). The toolset provides a complete
+co-design flow from C/C++ programs down to synthesizable VHDL and parallel
+program binaries. Processor customization points include the register files,
+function units, supported operations, and the interconnection network.</p>
+  
+<p>TCE uses Clang and LLVM for C/C++ language support, target independent
+optimizations and also for parts of code generation. It generates new LLVM-based
+code generators "on the fly" for the designed TTA processors and loads them in
+to the compiler backend as runtime libraries to avoid per-target recompilation
+of larger parts of the compiler chain.</p>
 </div>
 
-<div class="doc_text">
-<p>
-<a href="http://www.quokforge.org/projects/horizon">Horizon</a> is a bytecode
-language and compiler written on top of LLVM, intended for producing
-single-address-space managed code operating systems that
-run faster than the equivalent multiple-address-space C systems.
-More in-depth blurb is available on the <a 
-href="http://www.quokforge.org/projects/horizon/wiki/Wiki">wiki</a>.</p>
-
-</div>
 
+  
 <!--=========================================================================-->
-<div class="doc_subsection">
-<a name="clamav">Clam AntiVirus</a>
+<h3>PinaVM</h3>
+  
+<div>
+<p><a href="http://gitorious.org/pinavm/pages/Home">PinaVM</a> is an open
+source, <a href="http://www.systemc.org/">SystemC</a> front-end. Unlike many
+other front-ends, PinaVM actually executes the elaboration of the
+program analyzed using LLVM's JIT infrastructure. It later enriches the
+bitcode with SystemC-specific information.</p>
 </div>
 
-<div class="doc_text">
-<p>
-<a href="http://www.clamav.net">Clam AntiVirus</a> is an open source (GPL)
-anti-virus toolkit for UNIX, designed especially for e-mail scanning on mail
-gateways.  Since version 0.96 it has <a
-href="http://vrt-sourcefire.blogspot.com/2010/09/introduction-to-clamavs-low-level.html">bytecode
-signatures</a> that allow writing detections for complex malware. It
-uses LLVM's JIT to speed up the execution of bytecode on
-X86, X86-64, PPC32/64, falling back to its own interpreter otherwise.
-The git version was updated to work with LLVM 2.8.
-</p>
-
-<p>The <a
-href="http://git.clamav.net/gitweb?p=clamav-bytecode-compiler.git;a=blob_plain;f=docs/user/clambc-user.pdf">
-ClamAV bytecode compiler</a> uses Clang and LLVM to compile a C-like
-language, insert runtime checks, and generate ClamAV bytecode.</p>
-
+<!--=========================================================================-->
+<h3>Pure</h3>
+  
+<div>
+<p><a href="http://pure-lang.googlecode.com/">Pure</a> is an
+  algebraic/functional
+  programming language based on term rewriting. Programs are collections
+  of equations which are used to evaluate expressions in a symbolic
+  fashion. The interpreter uses LLVM as a backend to JIT-compile Pure
+  programs to fast native code. Pure offers dynamic typing, eager and lazy
+  evaluation, lexical closures, a hygienic macro system (also based on
+  term rewriting), built-in list and matrix support (including list and
+  matrix comprehensions) and an easy-to-use interface to C and other
+  programming languages (including the ability to load LLVM bitcode
+  modules, and inline C, C++, Fortran and Faust code in Pure programs if
+  the corresponding LLVM-enabled compilers are installed).</p>
+  
+<p>Pure version 0.47 has been tested and is known to work with LLVM 2.9
+  (and continues to work with older LLVM releases &gt;= 2.5).</p>
 </div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
-<a name="pure">Pure</a>
-</div>
+<h3 id="icedtea">IcedTea Java Virtual Machine Implementation</h3>
 
-<div class="doc_text">
+<div>
 <p>
-<a href="http://pure-lang.googlecode.com/">Pure</a>
-is an algebraic/functional
-programming language based on term rewriting. Programs are collections
-of equations which are used to evaluate expressions in a symbolic
-fashion. Pure offers dynamic typing, eager and lazy evaluation, lexical
-closures, a hygienic macro system (also based on term rewriting),
-built-in list and matrix support (including list and matrix
-comprehensions) and an easy-to-use C interface. The interpreter uses
-LLVM as a backend to JIT-compile Pure programs to fast native code.</p>
-
-<p>Pure versions 0.44 and later have been tested and are known to work with
-LLVM 2.8 (and continue to work with older LLVM releases >= 2.5).</p>
+<a href="http://icedtea.classpath.org/wiki/Main_Page">IcedTea</a> provides a
+harness to build OpenJDK using only free software build tools and to provide
+replacements for the not-yet free parts of OpenJDK.  One of the extensions that
+IcedTea provides is a new JIT compiler named <a
+href="http://icedtea.classpath.org/wiki/ZeroSharkFaq">Shark</a> which uses LLVM
+to provide native code generation without introducing processor-dependent
+code.
+</p>
 
+<p> OpenJDK 7 b112, IcedTea6 1.9 and IcedTea7 1.13 and later have been tested
+and are known to work with LLVM 2.9 (and continue to work with older LLVM
+releases &gt;= 2.6 as well).</p>
 </div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
-<a name="GHC">Glasgow Haskell Compiler (GHC)</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://www.haskell.org/ghc/">GHC</a> is an open source,
-state-of-the-art programming suite for
-Haskell, a standard lazy functional programming language. It includes
-an optimizing static compiler generating good code for a variety of
+<h3>Glasgow Haskell Compiler (GHC)</h3>
+  
+<div>
+<p>GHC is an open source, state-of-the-art programming suite for Haskell,
+a standard lazy functional programming language. It includes an
+optimizing static compiler generating good code for a variety of
 platforms, together with an interactive system for convenient, quick
 development.</p>
 
 <p>In addition to the existing C and native code generators, GHC 7.0 now
-supports an <a
-href="http://hackage.haskell.org/trac/ghc/wiki/Commentary/Compiler/Backends/LLVM">LLVM
-code generator</a>. GHC supports LLVM 2.7 and later.</p>
-
+supports an LLVM code generator. GHC supports LLVM 2.7 and later.</p>
 </div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
-<a name="Clay">Clay Programming Language</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://tachyon.in/clay/">Clay</a> is a new systems programming
-language that is specifically designed for generic programming. It makes
-generic programming very concise thanks to whole program type propagation. It
-uses LLVM as its backend.</p>
-
+<h3>Polly - Polyhedral optimizations for LLVM</h3>
+  
+<div>
+<p>Polly is a project that aims to provide advanced memory access optimizations
+to better take advantage of SIMD units, cache hierarchies, multiple cores or
+even vector accelerators for LLVM. Built around an abstract mathematical
+description based on Z-polyhedra, it provides the infrastructure to develop
+advanced optimizations in LLVM and to connect complex external optimizers. In
+its first year of existence Polly already provides an exact value-based
+dependency analysis as well as basic SIMD and OpenMP code generation support.
+Furthermore, Polly can use PoCC(Pluto) an advanced optimizer for data-locality
+and parallelism.</p>
 </div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
-<a name="llvm-py">llvm-py Python Bindings for LLVM</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://www.mdevan.org/llvm-py/">llvm-py</a> has been updated to work
-with LLVM 2.8.  llvm-py provides Python bindings for LLVM, allowing you to write a
-compiler backend or a VM in Python.</p>
+<h3>Rubinius</h3>
 
+<div>
+  <p><a href="http://github.com/evanphx/rubinius">Rubinius</a> is an environment
+  for running Ruby code which strives to write as much of the implementation in
+  Ruby as possible. Combined with a bytecode interpreting VM, it uses LLVM to
+  optimize and compile ruby code down to machine code. Techniques such as type
+  feedback, method inlining, and deoptimization are all used to remove dynamism
+  from ruby execution and increase performance.</p>
 </div>
 
 
 <!--=========================================================================-->
-<div class="doc_subsection">
+<h3>
 <a name="FAUST">FAUST Real-Time Audio Signal Processing Language</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>
 <a href="http://faust.grame.fr">FAUST</a> is a compiled language for real-time
 audio signal processing. The name FAUST stands for Functional AUdio STream. Its
 programming model combines two approaches: functional programming and block
 diagram composition. In addition with the C, C++, JAVA output formats, the
-Faust compiler can now generate LLVM bitcode, and works with LLVM 2.7 and
-2.8.</p>
-
-</div>
+Faust compiler can now generate LLVM bitcode, and works with LLVM 2.7-2.9.</p>
 
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="jade">Jade Just-in-time Adaptive Decoder Engine</a>
 </div>
-
-<div class="doc_text">
-<p><a 
-href="http://sourceforge.net/apps/trac/orcc/wiki/JadeDocumentation">Jade</a>
-(Just-in-time Adaptive Decoder Engine) is a generic video decoder engine using
-LLVM for just-in-time compilation of video decoder configurations. Those
-configurations are designed by MPEG Reconfigurable Video Coding (RVC) committee.
-MPEG RVC standard is built on a stream-based dataflow representation of
-decoders. It is composed of a standard library of coding tools written in
-RVC-CAL language and a dataflow configuration &#8212; block diagram &#8212;
-of a decoder.</p>
-
-<p>Jade project is hosted as part of the <a href="http://orcc.sf.net">Open 
-RVC-CAL Compiler</a> and requires it to translate the RVC-CAL standard library
-of video coding tools into an LLVM assembly code.</p>
-
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="neko_llvm_jit">LLVM JIT for Neko VM</a>
-</div>
-
-<div class="doc_text">
-<p><a href="http://github.com/vava/neko_llvm_jit">Neko LLVM JIT</a>
-replaces the standard Neko JIT with an LLVM-based implementation.  While not
-fully complete, it is already providing a 1.5x speedup on 64-bit systems.
-Neko LLVM JIT requires LLVM 2.8 or later.</p>
-
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="crack">Crack Scripting Language</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://code.google.com/p/crack-language/">Crack</a> aims to provide
-the ease of development of a scripting language with the performance of a
-compiled language. The language derives concepts from C++, Java and Python,
-incorporating object-oriented programming, operator overloading and strong
-typing.  Crack 0.2 works with LLVM 2.7, and the forthcoming Crack 0.2.1 release
-builds on LLVM 2.8.</p>
-
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="DresdenTM">Dresden TM Compiler (DTMC)</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://tm.inf.tu-dresden.de">DTMC</a> provides support for 
-Transactional Memory, which is an easy-to-use and efficient way to synchronize 
-accesses to shared memory. Transactions can contain normal C/C++ code (e.g., 
-<code>__transaction { list.remove(x); x.refCount--; }</code>) and will be executed 
-virtually atomically and isolated from other transactions.</p>
-
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="Kai">Kai Programming Language</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://www.oriontransfer.co.nz/research/kai">Kai</a> (Japanese 会 for
-meeting/gathering) is an experimental interpreter that provides a highly
-extensible runtime environment and explicit control over the compilation
-process. Programs are defined using nested symbolic expressions, which are all
-parsed into first-class values with minimal intrinsic semantics. Kai can
-generate optimised code at run-time (using LLVM) in order to exploit the nature
-of the underlying hardware and to integrate with external software libraries.
-It is a unique exploration into world of dynamic code compilation, and the
-interaction between high level and low level semantics.</p>
-
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="OSL">OSL: Open Shading Language</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://code.google.com/p/openshadinglanguage/">OSL</a> is a shading
-language designed for use in physically based renderers and in particular
-production rendering. By using LLVM instead of the interpreter, it was able to
-meet its performance goals (&gt;= C-code) while retaining the benefits of
-runtime specialization and a portable high-level language.
-</p>
-
+  
 </div>
 
-
-
 <!-- *********************************************************************** -->
-<div class="doc_section">
-  <a name="whatsnew">What's New in LLVM 2.8?</a>
-</div>
+<h2>
+  <a name="whatsnew">What's New in LLVM 2.9?</a>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This release includes a huge number of bug fixes, performance tweaks and
 minor improvements.  Some of the major improvements and new features are listed
 in this section.
 </p>
 
-</div>
-
 <!--=========================================================================-->
-<div class="doc_subsection">
+<h3>
 <a name="majorfeatures">Major New Features</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
-<p>LLVM 2.8 includes several major new capabilities:</p>
+<p>LLVM 2.9 includes several major new capabilities:</p>
 
 <ul>
-<li>As mentioned above, <a href="#libc++">libc++</a> and <a 
-   href="#lldb">LLDB</a> are major new additions to the LLVM collective.</li>
-<li>LLVM 2.8 now has pretty decent support for debugging optimized code.  You
-    should be able to reliably get debug info for function arguments, assuming
-    that the value is actually available where you have stopped.</li>
-<li>A new 'llvm-diff' tool is available that does a semantic diff of .ll
-    files.</li>
-<li>The <a href="#mc">MC subproject</a> has made major progress in this release.
-    Direct .o file writing support for darwin/x86[-64] is now reliable and
-    support for other targets and object file formats are in progress.</li>
-</ul>
+  
+<li>Type Based Alias Analysis (TBAA) is now implemented and turned on by default
+  in Clang.  This allows substantially better load/store optimization in some
+  cases.  TBAA can be disabled by passing -fno-strict-aliasing.
+</li>
+
+<li>This release has seen a continued focus on quality of debug information. 
+  LLVM now generates much higher fidelity debug information, particularly when
+  debugging optimized code.</li>
+
+<li>Inline assembly now supports multiple alternative constraints.</li>  
 
+<li>A new backend for the NVIDIA PTX virtual ISA (used to target its GPUs) is
+  under rapid development.  It is not generally useful in 2.9, but is making
+  rapid progress.</li>
+  
+</ul>
+  
 </div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
+<h3>
 <a name="coreimprovements">LLVM IR and Core Improvements</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>LLVM IR has several new features for better support of new targets and that
 expose new optimization opportunities:</p>
 
 <ul>
-<li>The <a href="LangRef.html#int_libc">memcpy, memmove, and memset</a>
-  intrinsics now take address space qualified pointers and a bit to indicate
-  whether the transfer is "<a href="LangRef.html#volatile">volatile</a>" or not.
-</li>
-<li>Per-instruction debug info metadata is much faster and uses less memory by
-    using the new DebugLoc class.</li>
-<li>LLVM IR now has a more formalized concept of "<a
-    href="LangRef.html#trapvalues">trap values</a>", which allow the optimizer
-    to optimize more aggressively in the presence of undefined behavior, while
-    still producing predictable results.</li>
-<li>LLVM IR now supports two new <a href="LangRef.html#linkage">linkage
-    types</a> (linker_private_weak and linker_private_weak_def_auto) which map
-    onto some obscure MachO concepts.</li>
+<li>The <a href="LangRef.html#bitwiseops">udiv, ashr, lshr, and shl</a>
+  instructions now have support exact and nuw/nsw bits to indicate that they
+  don't overflow or shift out bits.  This is useful for optimization of <a
+    href="http://llvm.org/PR8862">pointer differences</a> and other cases.</li>
+  
+<li>LLVM IR now supports the <a href="LangRef.html#globalvars">unnamed_addr</a>
+  attribute to indicate that constant global variables with identical
+  initializers can be merged.  This fixed <a href="http://llvm.org/PR8927">an
+  issue</a> where LLVM would incorrectly merge two globals which were supposed
+  to have distinct addresses.</li>
+  
+<li>The new <a href="LangRef.html#fnattrs">hotpatch attribute</a> has been added
+  to allow runtime patching of functions.</li> 
 </ul>
 
 </div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
+<h3>
 <a name="optimizer">Optimizer Improvements</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>In addition to a large array of minor performance tweaks and bug fixes, this
 release includes a few major enhancements and additions to the optimizers:</p>
 
 <ul>
-<li>As mentioned above, the optimizer now has support for updating debug
-   information as it goes.  A key aspect of this is the new <a
-   href="SourceLevelDebugging.html#format_common_value">llvm.dbg.value</a>
-   intrinsic.  This intrinsic represents debug info for variables that are
-   promoted to SSA values (typically by mem2reg or the -scalarrepl passes).</li>
-
-<li>The JumpThreading pass is now much more aggressive about implied value
-    relations, allowing it to thread conditions like "a == 4" when a is known to
-    be 13 in one of the predecessors of a block.  It does this in conjunction
-    with the new LazyValueInfo analysis pass.</li>
-<li>The new RegionInfo analysis pass identifies single-entry single-exit regions
-    in the CFG.  You can play with it with the "opt -regions -analyze" or
-    "opt -view-regions" commands.</li>
-<li>The loop optimizer has significantly improved strength reduction and analysis
-  capabilities.  Notably it is able to build on the trap value and signed
-  integer overflow information to optimize &lt;= and &gt;= loops.</li>
-<li>The CallGraphSCCPassManager now has some basic support for iterating within
-    an SCC when a optimizer devirtualizes a function call.  This allows inlining
-    through indirect call sites that are devirtualized by store-load forwarding
-    and other optimizations.</li>
-<li>The new <A href="Passes.html#loweratomic">-loweratomic</a> pass is available
-    to lower atomic instructions into their non-atomic form.  This can be useful
-    to optimize generic code that expects to run in a single-threaded
-    environment.</li>
-</ul>
+<li>Link Time Optimization (LTO) has been improved to use MC for parsing inline
+  assembly and now can build large programs like Firefox 4 on both Mac OS X and
+  Linux.</li>
+  
+<li>The new -loop-idiom pass recognizes memset/memcpy loops (and memset_pattern
+  on darwin), turning them into library calls, which are typically better
+  optimized than inline code.  If you are building a libc and notice that your
+  memcpy and memset functions are compiled into infinite recursion, please build
+  with -ffreestanding or -fno-builtin to disable this pass.</li>
+  
+<li>A new -early-cse pass does a fast pass over functions to fold constants,
+  simplify expressions, perform simple dead store elimination, and perform
+  common subexpression elimination.  It does a good job at catching some of the
+  trivial redundancies that exist in unoptimized code, making later passes more
+  effective.</li>
+
+<li>A new -loop-instsimplify pass is used to clean up loop bodies in the loop
+  optimizer.</li>
+  
+<li>The new TargetLibraryInfo interface allows mid-level optimizations to know
+  whether the current target's runtime library has certain functions.  For
+  example, the optimizer can now transform integer-only printf calls to call
+  iprintf, allowing reduced code size for embedded C libraries (e.g. newlib).
+</li>
+    
+<li>LLVM has a new <a href="WritingAnLLVMPass.html#RegionPass">RegionPass</a>
+  infrastructure for region-based optimizations.</li>
+
+<li>Several optimizer passes have been substantially sped up:
+  GVN is much faster on functions with deep dominator trees and lots of basic
+  blocks.  The dominator tree and dominance frontier passes are much faster to
+  compute, and preserved by more passes (so they are computed less often).  The
+  -scalar-repl pass is also much faster and doesn't use DominanceFrontier.
+</li>
 
-<!--
-<p>In addition to these features that are done in 2.8, there is preliminary
-   support in the release for Type Based Alias Analysis 
-  Preliminary work on TBAA but not usable in 2.8.
-  New CorrelatedValuePropagation pass, not on by default in 2.8 yet.
--->
+<li>The Dead Store Elimination pass is more aggressive optimizing stores of
+  different types: e.g. a large store following a small one to the same address.
+  The MemCpyOptimizer pass handles several new forms of memcpy elimination.</li>
+  
+<li>LLVM now optimizes various idioms for overflow detection into check of the
+  flag register on various CPUs.  For example, we now compile:
+  
+  <pre>
+   unsigned long t = a+b;
+   if (t &lt; a) ...
+  </pre>
+  into:
+  <pre>
+   addq %rdi, %rbx
+   jno  LBB0_2
+  </pre>
+</li>
+  
+</ul>
 
 </div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
+<h3>
 <a name="mc">MC Level Improvements</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>
 The LLVM Machine Code (aka MC) subsystem was created to solve a number
 of problems in the realm of assembly, disassembly, object file format handling,
 and a number of other related areas that CPU instruction-set level tools work
 in.</p>
 
-<p>The MC subproject has made great leaps in LLVM 2.8.  For example, support for
-   directly writing .o files from LLC (and clang) now works reliably for
-   darwin/x86[-64] (including inline assembly support) and the integrated
-   assembler is turned on by default in Clang for these targets.  This provides
-   improved compile times among other things.</p>
-
 <ul>
-<li>The entire compiler has converted over to using the MCStreamer assembler API
-    instead of writing out a .s file textually.</li>
-<li>The "assembler parser" is far more mature than in 2.7, supporting a full
-    complement of directives, now supports assembler macros, etc.</li>
-<li>The "assembler backend" has been completed, including support for relaxation
-    relocation processing and all the other things that an assembler does.</li>
-<li>The MachO file format support is now fully functional and works.</li>
-<li>The MC disassembler now fully supports ARM and Thumb.  ARM assembler support
-    is still in early development though.</li>
-<li>The X86 MC assembler now supports the X86 AES and AVX instruction set.</li>
-<li>Work on ELF and COFF object files and ARM target support is well underway,
-    but isn't useful yet in LLVM 2.8.  Please contact the llvmdev mailing list
-    if you're interested in this.</li>
+<li>ELF MC support has matured enough for the integrated assembler to be turned
+  on by default in Clang on X86-32 and X86-64 ELF systems.</li>
+  
+<li>MC supports and CodeGen uses the <tt>.file</tt> and <tt>.loc</tt> directives
+  for producing line number debug info. This produces more compact line
+  tables and easier to read .s files.</li>
+  
+<li>MC supports the <tt>.cfi_*</tt> directives for producing DWARF
+  frame information, but it is still not used by CodeGen by default.</li>
+
+  
+<li>The MC assembler now generates much better diagnostics for common errors,
+  is much faster at matching instructions, is much more bug-compatible with
+  the GAS assembler, and is now generally useful for a broad range of X86
+  assembly.</li>
+  
+<li>We now have some basic <a href="CodeGenerator.html#mc">internals
+  documentation</a> for MC.</li>
+  
+<li>.td files can now specify assembler aliases directly with the <a 
+   href="CodeGenerator.html#na_instparsing">MnemonicAlias and InstAlias</a>
+   tblgen classes.</li>
+  
+<li>LLVM now has an experimental format-independent object file manipulation
+  library (lib/Object).  It supports both PE/COFF and ELF.  The llvm-nm tool has
+  been extended to work with native object files, and the new llvm-objdump tool
+  supports disassembly of object files (but no relocations are displayed yet).
+</li>
+  
+<li>Win32 PE-COFF support in the MC assembler has made a lot of progress in the
+  2.9 timeframe, but is still not generally useful.</li>
+
 </ul>
 
 <p>For more information, please see the <a
@@ -736,375 +616,222 @@ href="http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html">Intro to the
 LLVM MC Project Blog Post</a>.
 </p>
 
-</div>	
-
+</div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
+<h3>
 <a name="codegen">Target Independent Code Generator Improvements</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>We have put a significant amount of work into the code generator
 infrastructure, which allows us to implement more aggressive algorithms and make
 it run faster:</p>
 
 <ul>
-<li>The clang/gcc -momit-leaf-frame-pointer argument is now supported.</li>
-<li>The clang/gcc -ffunction-sections and -fdata-sections arguments are now
-    supported on ELF targets (like GCC).</li>
-<li>The MachineCSE pass is now tuned and on by default.  It eliminates common
-    subexpressions that are exposed when lowering to machine instructions.</li>
-<li>The "local" register allocator was replaced by a new "fast" register
-    allocator.  This new allocator (which is often used at -O0) is substantially
-    faster and produces better code than the old local register allocator.</li>
-<li>A new LLC "-regalloc=default" option is available, which automatically
-    chooses a register allocator based on the -O optimization level.</li>
-<li>The common code generator code was modified to promote illegal argument and
-    return value vectors to wider ones when possible instead of scalarizing
-    them.  For example, &lt;3 x float&gt; will now pass in one SSE register
-    instead of 3 on X86.  This generates substantially better code since the
-    rest of the code generator was already expecting this.</li>
-<li>The code generator uses a new "COPY" machine instruction.  This speeds up
-    the code generator and eliminates the need for targets to implement the 
-    isMoveInstr hook.  Also, the copyRegToReg hook was renamed to copyPhysReg
-    and simplified.</li>
-<li>The code generator now has a "LocalStackSlotPass", which optimizes stack
-    slot access for targets (like ARM) that have limited stack displacement
-    addressing.</li>
-<li>A new "PeepholeOptimizer" is available, which eliminates sign and zero
-    extends, and optimizes away compare instructions when the condition result
-    is available from a previous instruction.</li>
-<li>Atomic operations now get legalized into simpler atomic operations if not
-    natively supported, easing the implementation burden on targets.</li>
-<li>We have added two new bottom-up pre-allocation register pressure aware schedulers:
-<ol>
-<li>The hybrid scheduler schedules aggressively to minimize schedule length when registers are available and avoid overscheduling in high pressure situations.</li>
-<li>The instruction-level-parallelism scheduler schedules for maximum ILP when registers are available and avoid overscheduling in high pressure situations.</li>
-</ol></li>
-<li>The tblgen type inference algorithm was rewritten to be more consistent and
-     diagnose more target bugs.  If you have an out-of-tree backend, you may
-     find that it finds bugs in your target description.  This support also
-     allows limited support for writing patterns for instructions that return
-     multiple results (e.g. a virtual register and a flag result).  The 
-     'parallel' modifier in tblgen was removed, you should use the new support
-     for multiple results instead.</li>
-<li>A new (experimental) "-rendermf" pass is available which renders a
-    MachineFunction into HTML, showing live ranges and other useful
-    details.</li>
-<li>The new SubRegIndex tablegen class allows subregisters to be indexed
-    symbolically instead of numerically.  If your target uses subregisters you
-    will need to adapt to use SubRegIndex when you upgrade to 2.8.</li>
-<!-- SplitKit -->
-
-<li>The -fast-isel instruction selection path (used at -O0 on X86) was rewritten
-    to work bottom-up on basic blocks instead of top down.  This makes it
-    slightly faster (because the MachineDCE pass is not needed any longer) and
-    allows it to generate better code in some cases.</li>
+<li>The pre-register-allocation (preRA) instruction scheduler models register
+  pressure much more accurately in some cases. This allows the adoption of more
+  aggressive scheduling heuristics without causing spills to be generated.
+</li>
+  
+<li>LiveDebugVariables is a new pass that keeps track of debugging information
+  for user variables that are promoted to registers in optimized builds.</li>  
+
+<li>The scheduler now models operand latency and pipeline forwarding.</li>
 
+<li>A major register allocator infrastructure rewrite is underway.  It is not on
+    by default for 2.9 and you are not advised to use it, but it has made
+    substantial progress in the 2.9 timeframe:
+  <ul>
+  <li>A new -regalloc=basic "basic" register allocator can be used as a simple
+      fallback when debugging.  It uses the new infrastructure.</li>
+  <li>New infrastructure is in place for live range splitting.  "SplitKit" can
+      break a live interval into smaller pieces while preserving SSA form, and
+      SpillPlacement can help find the best split points. This is a work in
+      progress so the API is changing quickly.</li>
+   <li>The inline spiller has learned to clean up after live range splitting. It
+      can hoist spills out of loops, and it can eliminate redundant spills.</li>
+   <li>Rematerialization works with live range splitting.</li>
+   <li>The new "greedy" register allocator using live range splitting. This will
+     be the default register allocator in the next LLVM release, but it is not
+     turned on by default in 2.9.</li>
+   </ul>
+</li>
 </ul>
 </div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
+<h3>
 <a name="x86">X86-32 and X86-64 Target Improvements</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>New features and major changes in the X86 target include:
 </p>
 
 <ul>
-<li>The X86 backend now supports holding X87 floating point stack values
-    in registers across basic blocks, dramatically improving performance of code
-    that uses long double, and when targeting CPUs that don't support SSE.</li>
-
-<li>The X86 backend now uses a SSEDomainFix pass to optimize SSE operations.  On
-    Nehalem ("Core i7") and newer CPUs there is a 2 cycle latency penalty on
-    using a register in a different domain than where it was defined. This pass
-    optimizes away these stalls.</li>
-
-<li>The X86 backend now promotes 16-bit integer operations to 32-bits when
-    possible. This avoids 0x66 prefixes, which are slow on some
-    microarchitectures and bloat the code on all of them.</li>
-
-<li>The X86 backend now supports the Microsoft "thiscall" calling convention,
-    and a <a href="LangRef.html#callingconv">calling convention</a> to support
-    <a href="#GHC">ghc</a>.</li>
-
-<li>The X86 backend supports a new "llvm.x86.int" intrinsic, which maps onto
-    the X86 "int $42" and "int3" instructions.</li>
-
-<li>At the IR level, the &lt;2 x float&gt; datatype is now promoted and passed
-    around as a &lt;4 x float&gt; instead of being passed and returned as an MMX
-    vector.  If you have a frontend that uses this, please pass and return a
-    &lt;2 x i32&gt; instead (using bitcasts).</li>
-
-<li>When printing .s files in verbose assembly mode (the default for clang -S),
-    the X86 backend now decodes X86 shuffle instructions and prints human
-    readable comments after the most inscrutable of them, e.g.:
-    
-<pre>
-  insertps $113, %xmm3, %xmm0 <i># xmm0 = zero,xmm0[1,2],xmm3[1]</i>
-  unpcklps %xmm1, %xmm0       <i># xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]</i>
-  pshufd   $1, %xmm1, %xmm1   <i># xmm1 = xmm1[1,0,0,0]</i>
-</pre>
+<li>LLVM 2.9 includes a complete reimplementation of the MMX instruction set.
+  The reimplementation uses a new LLVM IR <a 
+  href="LangRef.html#t_x86mmx">x86_mmx</a> type to ensure that MMX operations
+  are <em>only</em> generated from source that uses MMX builtin operations. With
+  this, random types like &lt;2 x i32&gt; are not turned into MMX operations
+  (which can be catastrophic without proper "emms" insertion).  Because the X86
+  code generator always generates reliable code, the -disable-mmx flag is now
+  removed.
+</li>
+  
+<li>X86 support for FS/GS relative loads and stores using <a 
+    href="CodeGenerator.html#x86_memory">address space 256/257</a> works reliably
+    now.</li>
+  
+<li>LLVM 2.9 generates much better code in several cases by using adc/sbb to
+   avoid generation of conditional move instructions for conditional increment
+   and other idioms.</li>
+
+<li>The X86 backend has adopted a new preRA scheduling mode, "list-ilp", to
+  shorten the height of instruction schedules without inducing register spills.
 </li>
-        
+
+<li>The MC assembler supports 3dNow! and 3DNowA instructions.</li>
+  
+<li>Several bugs have been fixed for Windows x64 code generator.</li>
 </ul>
 
 </div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
+<h3>
 <a name="ARM">ARM Target Improvements</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>New features of the ARM target include:
 </p>
 
 <ul>
-<li>The ARM backend now optimizes tail calls into jumps.</li>
-<li>Scheduling is improved through the new list-hybrid scheduler as well
-    as through better modeling of structural hazards.</li>
-<li><a href="LangRef.html#int_fp16">Half float</a> instructions are now
-    supported.</li>
-<li>NEON support has been improved to model instructions which operate onto 
-    multiple consecutive registers more aggressively.  This avoids lots of
-    extraneous register copies.</li>
-<li>The ARM backend now uses a new "ARMGlobalMerge" pass, which merges several
-    global variables into one, saving extra address computation (all the global
-    variables can be accessed via same base address) and potentially reducing
-    register pressure.</li>
-
-<li>The ARM backend has received many minor improvements and tweaks which lead
-    to substantially better performance in a wide range of different scenarios.
-</li>
+<li>The ARM backend now has a fast instruction selector, which dramatically
+     improves -O0 compile times.</li>
+<li>The ARM backend has new tuning for Cortex-A8 and Cortex-A9 CPUs.</li>
+<li>The __builtin_prefetch builtin (and llvm.prefetch intrinsic) is compiled
+    into prefetch instructions instead of being discarded.</li>
 
-<li>The ARM NEON intrinsics have been substantially reworked to reduce
-    redundancy and improve code generation.  Some of the major changes are:
-  <ol>
-  <li>
-    All of the NEON load and store intrinsics (llvm.arm.neon.vld* and
-    llvm.arm.neon.vst*) take an extra parameter to specify the alignment in bytes
-    of the memory being accessed.
-  </li>
-  <li>
-    The llvm.arm.neon.vaba intrinsic (vector absolute difference and
-    accumulate) has been removed.  This operation is now represented using
-    the llvm.arm.neon.vabd intrinsic (vector absolute difference) followed by a
-    vector add.
-  </li>
-  <li>
-    The llvm.arm.neon.vabdl and llvm.arm.neon.vabal intrinsics (lengthening
-    vector absolute difference with and without accumulation) have been removed.
-    They are represented using the llvm.arm.neon.vabd intrinsic (vector absolute
-    difference) followed by a vector zero-extend operation, and for vabal,
-    a vector add.
-  </li>
-  <li>
-    The llvm.arm.neon.vmovn intrinsic has been removed.  Calls of this intrinsic
-    are now replaced by vector truncate operations.
-  </li>
-  <li>
-    The llvm.arm.neon.vmovls and llvm.arm.neon.vmovlu intrinsics have been
-    removed.  They are now represented as vector sign-extend (vmovls) and
-    zero-extend (vmovlu) operations.
-  </li>
-  <li>
-    The llvm.arm.neon.vaddl*, llvm.arm.neon.vaddw*, llvm.arm.neon.vsubl*, and
-    llvm.arm.neon.vsubw* intrinsics (lengthening vector add and subtract) have
-    been removed.  They are replaced by vector add and vector subtract operations
-    where one (vaddw, vsubw) or both (vaddl, vsubl) of the operands are either
-    sign-extended or zero-extended.
-  </li>
-  <li>
-    The llvm.arm.neon.vmulls, llvm.arm.neon.vmullu, llvm.arm.neon.vmlal*, and
-    llvm.arm.neon.vmlsl* intrinsics (lengthening vector multiply with and without
-    accumulation and subtraction) have been removed.  These operations are now
-    represented as vector multiplications where the operands are either
-    sign-extended or zero-extended, followed by a vector add for vmlal or a
-    vector subtract for vmlsl.  Note that the polynomial vector multiply
-    intrinsic, llvm.arm.neon.vmullp, remains unchanged.
-  </li>
-  </ol>
-</li>
+<li>  The ARM backend preRA scheduler now models machine resources at cycle
+  granularity. This allows the scheduler to both accurately model
+  instruction latency and avoid overcommitting functional units.</li>
 
+<li>Countless ARM microoptimizations have landed in LLVM 2.9.</li>
 </ul>
 </div>
+  
+<!--=========================================================================-->
+<h3>
+<a name="OtherTS">Other Target Specific Improvements</a>
+</h3>
+
+<div>
+<ul>
+<li>MicroBlaze: major updates for aggressive delay slot filler, MC-based
+  assembly printing, assembly instruction parsing, ELF .o file emission, and MC
+  instruction disassembler have landed.</li>
 
+<li>SPARC: Many improvements, including using the Y registers for
+  multiplications and addition of a simple delay slot filler.</li>
+
+<li>PowerPC: The backend has been largely MC'ized and is ready to support
+  directly writing out mach-o object files.  No one seems interested in finishing
+  this final step though.</li>
+
+<li>Mips: Improved o32 ABI support, including better varags handling.
+More instructions supported in codegen: madd, msub, rotr, rotrv and clo.
+It also now supports lowering block addresses.</li>
+
+</ul>
+</div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
+<h3>
 <a name="changes">Major Changes and Removed Features</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>If you're already an LLVM user or developer with out-of-tree changes based
-on LLVM 2.7, this section lists some "gotchas" that you may run into upgrading
+on LLVM 2.8, this section lists some "gotchas" that you may run into upgrading
 from the previous release.</p>
 
 <ul>
-<li>The build configuration machinery changed the output directory names.  It
-    wasn't clear to many people that a "Release-Asserts" build was a release build
-    without asserts.  To make this more clear, "Release" does not include
-    assertions and "Release+Asserts" does (likewise, "Debug" and
-    "Debug+Asserts").</li>
-<li>The MSIL Backend was removed, it was unsupported and broken.</li>
-<li>The ABCD, SSI, and SCCVN passes were removed.  These were not fully
-    functional and their behavior has been or will be subsumed by the
-    LazyValueInfo  pass.</li>
-<li>The LLVM IR 'Union' feature was removed.  While this is a desirable feature
-    for LLVM IR to support, the existing implementation was half baked and
-    barely useful.  We'd really like anyone interested to resurrect the work and
-    finish it for a future release.</li>
-<li>If you're used to reading .ll files, you'll probably notice that .ll file
-    dumps don't produce #uses comments anymore.  To get them, run a .bc file
-    through "llvm-dis --show-annotations".</li>
-<li>Target triples are now stored in a normalized form, and all inputs from
-    humans are expected to be normalized by Triple::normalize before being
-    stored in a module triple or passed to another library.</li>
-</ul>
+<li><b>This is the last release to support the llvm-gcc frontend.</b></li>
 
+<li>LLVM has a new <a href="CodingStandards.html#ll_naming">naming
+  convention standard</a>, though the codebase hasn't fully adopted it yet.</li>
+  
+<li>The new DIBuilder class provides a simpler interface for front ends to
+    encode debug info in LLVM IR, and has replaced DIFactory.</li>
 
+<li>LLVM IR and other tools always work on normalized target triples (which have
+  been run through <tt>Triple::normalize</tt>).</li>
 
-<p>In addition, many APIs have changed in this release.  Some of the major LLVM
-API changes are:</p>
-<ul>
-<li>LLVM 2.8 changes the internal order of operands in <a
-  href="http://llvm.org/doxygen/classllvm_1_1InvokeInst.html"><tt>InvokeInst</tt></a>
-  and <a href="http://llvm.org/doxygen/classllvm_1_1CallInst.html"><tt>CallInst</tt></a>.
-  To be portable across releases, please use the <tt>CallSite</tt> class and the
-  high-level accessors, such as <tt>getCalledValue</tt> and
-  <tt>setUnwindDest</tt>.
-</li>
-<li>
-  You can no longer pass use_iterators directly to cast&lt;&gt; (and similar),
-  because these routines tend to perform costly dereference operations more
-  than once. You have to dereference the iterators yourself and pass them in.
-</li>
-<li>
-  llvm.memcpy.*, llvm.memset.*, llvm.memmove.* intrinsics take an extra
-  parameter now ("i1 isVolatile"), totaling 5 parameters, and the pointer
-  operands are now address-space qualified.
-  If you were creating these intrinsic calls and prototypes yourself (as opposed
-  to using Intrinsic::getDeclaration), you can use
-  UpgradeIntrinsicFunction/UpgradeIntrinsicCall to be portable across releases.
-</li>
-<li>
-  SetCurrentDebugLocation takes a DebugLoc now instead of a MDNode.
-  Change your code to use
-  SetCurrentDebugLocation(DebugLoc::getFromDILocation(...)).
-</li>
-<li>
-  The <tt>RegisterPass</tt> and <tt>RegisterAnalysisGroup</tt> templates are
-  considered deprecated, but continue to function in LLVM 2.8.  Clients are  
-  strongly advised to use the upcoming <tt>INITIALIZE_PASS()</tt> and
-  <tt>INITIALIZE_AG_PASS()</tt> macros instead.
-</li>
-<li>
-  The constructor for the Triple class no longer tries to understand odd triple
-  specifications.  Frontends should ensure that they only pass valid triples to
-  LLVM.  The Triple::normalize utility method has been added to help front-ends
-  deal with funky triples.
-</li>
-<li>
-  The signature of the <tt>GCMetadataPrinter::finishAssembly</tt> virtual
-  function changed: the <tt>raw_ostream</tt> and <tt>MCAsmInfo</tt> arguments
-  were dropped.  GC plugins which compute stack maps must be updated to avoid
-  having the old definition overload the new signature.
-</li>
-<li>
-  The signature of <tt>MemoryBuffer::getMemBuffer</tt> changed.  Unfortunately
-  calls intended for the old version still compile, but will not work correctly,
-  leading to a confusing error about an invalid header in the bitcode.
-</li>
-  
-<li>
-  Some APIs were renamed:
-  <ul>
-  <li>llvm_report_error -&gt; report_fatal_error</li>
-  <li>llvm_install_error_handler -&gt; install_fatal_error_handler</li>
-  <li>llvm::DwarfExceptionHandling -&gt; llvm::JITExceptionHandling</li>
-  <li>VISIBILITY_HIDDEN -&gt; LLVM_LIBRARY_VISIBILITY</li>
-  </ul>
-</li>
+<li>The target triple x86_64--mingw64 is obsoleted. Use x86_64--mingw32 
+  instead.</li>
 
-<li>
-  Some public headers were renamed:
-  <ul>
-    <li><tt>llvm/Assembly/AsmAnnotationWriter.h</tt> was renamed
-    to <tt>llvm/Assembly/AssemblyAnnotationWriter.h</tt>
-    </li>
-  </ul>
+<li>The PointerTracking pass has been removed from mainline, and moved to The
+  ClamAV project (its only client).</li>
+    
+<li>The LoopIndexSplit, LiveValues, SimplifyHalfPowrLibCalls, GEPSplitter, and
+  PartialSpecialization passes were removed.  They were unmaintained,
+  buggy, or deemed to be a bad idea.</li>
 </ul>
 
 </div>
 
 <!--=========================================================================-->
-<div class="doc_subsection">
-<a name="devtree_changes">Development Infrastructure Changes</a>
-</div>
+<h3>
+<a name="api_changes">Internal API Changes</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
-<p>This section lists changes to the LLVM development infrastructure. This
-mostly impacts users who actively work on LLVM or follow development on
-mainline, but may also impact users who leverage the LLVM build infrastructure
-or are interested in LLVM qualification.</p>
+<p>In addition, many APIs have changed in this release.  Some of the major
+  LLVM API changes are:</p>
 
 <ul>
-  <li>The default for <tt>make check</tt> is now to use
-  the <a href="http://llvm.org/cmds/lit.html">lit</a> testing tool, which is
-  part of LLVM itself. You can use <tt>lit</tt> directly as well, or use
-  the <tt>llvm-lit</tt> tool which is created as part of a Makefile or CMake
-  build (and knows how to find the appropriate tools). See the <tt>lit</tt>
-  documentation and the <a href="http://blog.llvm.org/2009/12/lit-it.html">blog
-  post</a>, and <a href="http://llvm.org/bugs/show_bug.cgi?id=5217">PR5217</a>
-  for more information.</li>
-
-  <li>The LLVM <tt>test-suite</tt> infrastructure has a new "simple" test format
-  (<tt>make TEST=simple</tt>). The new format is intended to require only a
-  compiler and not a full set of LLVM tools. This makes it useful for testing
-  released compilers, for running the test suite with other compilers (for
-  performance comparisons), and makes sure that we are testing the compiler as
-  users would see it. The new format is also designed to work using reference
-  outputs instead of comparison to a baseline compiler, which makes it run much
-  faster and makes it less system dependent.</li>
-
-  <li>Significant progress has been made on a new interface to running the
-  LLVM <tt>test-suite</tt> (aka the LLVM "nightly tests") using
-  the <a href="http://llvm.org/docs/lnt">LNT</a> infrastructure. The LNT
-  interface to the <tt>test-suite</tt> brings significantly improved reporting
-  capabilities for monitoring the correctness and generated code quality
-  produced by LLVM over time.</li>
+<li>include/llvm/System merged into include/llvm/Support.</li>
+<li>The <a href="http://llvm.org/PR5207">llvm::APInt API</a> was significantly
+  cleaned up.</li>
+
+<li>In the code generator, MVT::Flag was renamed to MVT::Glue to more accurately
+  describe its behavior.</li>
+
+<li>The system_error header from C++0x was added, and is now pervasively used to
+  capture and handle i/o and other errors in LLVM.</li>
+  
+<li>The old sys::Path API has been deprecated in favor of the new PathV2 API,
+    which is more efficient and flexible.</li>
 </ul>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="knownproblems">Known Problems</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This section contains significant known problems with the LLVM system,
 listed by component.  If you run into a problem, please check the <a
 href="http://llvm.org/bugs/">LLVM bug database</a> and submit a bug if
 there isn't already one.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="experimental">Experimental features included with this release</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The following components of this LLVM release are either untested, known to
 be broken or unreliable, or are in early development.  These components should
@@ -1114,43 +841,54 @@ components, please contact us on the <a
 href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev list</a>.</p>
 
 <ul>
-<li>The Alpha, Blackfin, CellSPU, MicroBlaze, MSP430, MIPS, SystemZ
+<li>The Alpha, Blackfin, CellSPU, MicroBlaze, MSP430, MIPS, PTX, SystemZ
     and XCore backends are experimental.</li>
 <li><tt>llc</tt> "<tt>-filetype=obj</tt>" is experimental on all targets
-    other than darwin-i386 and darwin-x86_64.</li>
+    other than darwin and ELF X86 systems.</li>
+    
 </ul>
 
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="x86-be">Known problems with the X86 back-end</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <ul>
   <li>The X86 backend does not yet support
     all <a href="http://llvm.org/PR879">inline assembly that uses the X86
     floating point stack</a>.  It supports the 'f' and 't' constraints, but not
     'u'.</li>
-  <li>Win64 code generation wasn't widely tested. Everything should work, but we
-    expect small issues to happen. Also, llvm-gcc cannot build the mingw64
-    runtime currently due to lack of support for the 'u' inline assembly
-    constraint and for X87 floating point inline assembly.</li>
   <li>The X86-64 backend does not yet support the LLVM IR instruction
       <tt>va_arg</tt>. Currently, front-ends support variadic
       argument constructs on X86-64 by lowering them manually.</li>
+  <li>Windows x64 (aka Win64) code generator has a few issues.
+    <ul>
+      <li>llvm-gcc cannot build the mingw-w64 runtime currently
+       due to lack of support for the 'u' inline assembly
+       constraint and for X87 floating point inline assembly.</li>
+      <li>On mingw-w64, you will see unresolved symbol <tt>__chkstk</tt>
+       due to <a href="http://llvm.org/bugs/show_bug.cgi?id=8919">Bug 8919</a>.
+       It is fixed in <a href="http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20110321/118499.html">r128206</a>.</li>
+      <li>Miss-aligned MOVDQA might crash your program. It is due to
+       <a href="http://llvm.org/bugs/show_bug.cgi?id=9483">Bug 9483</a>,
+       lack of handling aligned internal globals.</li>
+      </ul>
+  </li>
+
 </ul>
 
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ppc-be">Known problems with the PowerPC back-end</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <ul>
 <li>The Linux PPC32/ABI support needs testing for the interpreter and static
@@ -1160,11 +898,11 @@ compilation, and lacks support for debug information.</li>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="arm-be">Known problems with the ARM back-end</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <ul>
 <li>Thumb mode works only on ARMv6 or higher processors. On sub-ARMv6
@@ -1177,11 +915,11 @@ results (<a href="http://llvm.org/PR1388">PR1388</a>).</li>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="sparc-be">Known problems with the SPARC back-end</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <ul>
 <li>The SPARC backend only supports the 32-bit SPARC ABI (-m32); it does not
@@ -1191,11 +929,11 @@ results (<a href="http://llvm.org/PR1388">PR1388</a>).</li>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="mips-be">Known problems with the MIPS back-end</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <ul>
 <li>64-bit MIPS targets are not supported yet.</li>
@@ -1204,11 +942,11 @@ results (<a href="http://llvm.org/PR1388">PR1388</a>).</li>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="alpha-be">Known problems with the Alpha back-end</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <ul>
 
@@ -1219,11 +957,11 @@ appropriate nops inserted to ensure restartability.</li>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="c-be">Known problems with the C back-end</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The C backend has numerous problems and is not being actively maintained.
 Depending on it for anything serious is not advised.</p>
@@ -1242,11 +980,13 @@ Depending on it for anything serious is not advised.</p>
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="llvm-gcc">Known problems with the llvm-gcc front-end</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
+
+<p><b>LLVM 2.9 will be the last release of llvm-gcc.</b></p>
 
 <p>llvm-gcc is generally very stable for the C family of languages.  The only
    major language feature of GCC not supported by llvm-gcc is the
@@ -1267,16 +1007,18 @@ actively maintained.  If you are interested in Ada, we recommend that you
 consider using <a href="#dragonegg">dragonegg</a> instead.</p>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="additionalinfo">Additional Information</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>A wide variety of additional information is available on the <a
-href="http://llvm.org">LLVM web page</a>, in particular in the <a
+href="http://llvm.org/">LLVM web page</a>, in particular in the <a
 href="http://llvm.org/docs/">documentation</a> section.  The web page also
 contains versions of the API documentation which is up-to-date with the
 Subversion version of the source code.
@@ -1299,7 +1041,7 @@ lists</a>.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-10-26 14:43:36 +0200 (Tue, 26 Oct 2010) $
+  Last modified: $Date: 2011-04-21 03:52:00 +0200 (Thu, 21 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html
index 186ea4a..4cfb52e 100644
--- a/docs/SourceLevelDebugging.html
+++ b/docs/SourceLevelDebugging.html
@@ -8,7 +8,7 @@
 </head>
 <body>
 
-<div class="doc_title">Source Level Debugging with LLVM</div>
+<h1>Source Level Debugging with LLVM</h1>
 
 <table class="layout" style="width:100%">
   <tr class="layout">
@@ -68,10 +68,10 @@ height="369">
 
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="introduction">Introduction</a></div> 
+<h2><a name="introduction">Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This document is the central repository for all information pertaining to
    debug information in LLVM.  It describes the <a href="#format">actual format
@@ -80,14 +80,12 @@ height="369">
    Further, this document provides specific examples of what debug information
    for C/C++ looks like.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="phil">Philosophy behind LLVM debugging information</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The idea of the LLVM debugging information is to capture how the important
    pieces of the source-language's Abstract Syntax Tree map onto LLVM code.
@@ -133,11 +131,11 @@ height="369">
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="consumers">Debug information consumers</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The role of debug information is to provide meta information normally
    stripped away during the compilation process.  This meta information provides
@@ -157,11 +155,11 @@ height="369">
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="debugopt">Debugging optimized code</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>An extremely high priority of LLVM debugging information is to make it
    interact well with optimizations and analysis.  In particular, the LLVM debug
@@ -226,13 +224,15 @@ height="369">
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="format">Debugging information format</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>LLVM debugging information has been carefully designed to make it possible
    for the optimizer to optimize the program and debugging information without
@@ -265,14 +265,12 @@ height="369">
    common to any source-language.  The <a href="#ccxx_frontend">next section</a>
    describes the data layout conventions used by the C and C++ front-ends.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="debug_info_descriptors">Debug information descriptors</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>In consideration of the complexity and volume of debug information, LLVM
    provides a specification for well formed debug descriptors. </p>
@@ -312,14 +310,12 @@ height="369">
 
 <p>The details of the various descriptors follow.</p>  
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="format_compile_units">Compile unit descriptors</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -351,11 +347,11 @@ height="369">
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="format_files">File descriptors</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -380,11 +376,11 @@ height="369">
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="format_global_variables">Global variable descriptors</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -407,16 +403,17 @@ height="369">
 </div>
 
 <p>These descriptors provide debug information about globals variables.  The
-provide details such as name, type and where the variable is defined.</p>
+provide details such as name, type and where the variable is defined. All
+global variables are collected by named metadata <tt>!llvm.dbg.gv</tt>.</p>
 
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="format_subprograms">Subprogram descriptors</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -432,30 +429,35 @@ provide details such as name, type and where the variable is defined.</p>
   i32,      ;; Line number where defined
   metadata, ;; Reference to type descriptor
   i1,       ;; True if the global is local to compile unit (static)
-  i1        ;; True if the global is defined in the compile unit (not extern)
-  i32       ;; Virtuality, e.g. dwarf::DW_VIRTUALITY__virtual
-  i32       ;; Index into a virtual function
+  i1,       ;; True if the global is defined in the compile unit (not extern)
+  i32,      ;; Virtuality, e.g. dwarf::DW_VIRTUALITY__virtual
+  i32,      ;; Index into a virtual function
   metadata, ;; indicates which base type contains the vtable pointer for the 
             ;; derived class
-  i1        ;; isArtificial
-  i1        ;; isOptimized
-  Function *;; Pointer to LLVM function
+  i1,       ;; isArtificial
+  i1,       ;; isOptimized
+  Function *,;; Pointer to LLVM function
+  metadata, ;; Lists function template parameters
+  metadata  ;; Function declaration descriptor
 }
 </pre>
 </div>
 
 <p>These descriptors provide debug information about functions, methods and
    subprograms.  They provide details such as name, return types and the source
-   location where the subprogram is defined.</p>
+   location where the subprogram is defined.
+   All subprogram descriptors are collected by a named metadata 
+   <tt>!llvm.dbg.sp</tt>.
+</p>
 
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="format_blocks">Block descriptors</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -463,7 +465,9 @@ provide details such as name, type and where the variable is defined.</p>
   i32,     ;; Tag = 11 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_lexical_block)
   metadata,;; Reference to context descriptor
   i32,     ;; Line number
-  i32      ;; Column number
+  i32,     ;; Column number
+  metadata,;; Reference to source file
+  i32      ;; Unique ID to identify blocks from a template function
 }
 </pre>
 </div>
@@ -475,11 +479,11 @@ provide details such as name, type and where the variable is defined.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="format_basic_type">Basic type descriptors</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -527,11 +531,11 @@ DW_ATE_unsigned_char = 8
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="format_derived_type">Derived type descriptors</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -544,7 +548,12 @@ DW_ATE_unsigned_char = 8
   i64,      ;; Size in bits
   i64,      ;; Alignment in bits
   i64,      ;; Offset in bits
-  metadata  ;; Reference to type derived from
+  metadata, ;; Reference to type derived from
+  metadata, ;; (optional) Name of the Objective C property assoicated with 
+            ;; Objective-C an ivar 
+  metadata, ;; (optional) Name of the Objective C property getter selector.
+  metadata, ;; (optional) Name of the Objective C property setter selector.
+  i32       ;; (optional) Objective C property attributes.
 }
 </pre>
 </div>
@@ -594,11 +603,11 @@ DW_TAG_restrict_type    = 55
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="format_composite_type">Composite type descriptors</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -644,7 +653,8 @@ DW_TAG_inheritance      = 28
 
 <p>The members of enumeration types (tag = <tt>DW_TAG_enumeration_type</tt>) are
    <a href="#format_enumeration">enumerator descriptors</a>, each representing
-   the definition of enumeration value for the set.</p>
+   the definition of enumeration value for the set. All enumeration type
+   descriptors are collected by named metadata <tt>!llvm.dbg.enum</tt>.</p>
 
 <p>The members of structure (tag = <tt>DW_TAG_structure_type</tt>) or union (tag
    = <tt>DW_TAG_union_type</tt>) types are any one of
@@ -680,11 +690,11 @@ DW_TAG_inheritance      = 28
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="format_subrange">Subrange descriptors</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -700,16 +710,17 @@ DW_TAG_inheritance      = 28
    <a href="#format_composite_type">composite type</a>.  The low value defines
    the lower bounds typically zero for C/C++.  The high value is the upper
    bounds.  Values are 64 bit.  High - low + 1 is the size of the array.  If low
-   == high the array will be unbounded.</p>
+   > high the array bounds are not included in generated debugging information.
+</p>
 
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="format_enumeration">Enumerator descriptors</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -729,11 +740,11 @@ DW_TAG_inheritance      = 28
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="format_variables">Local variables</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -742,7 +753,8 @@ DW_TAG_inheritance      = 28
   metadata, ;; Context
   metadata, ;; Name
   metadata, ;; Reference to file where defined
-  i32,      ;; Line number where defined
+  i32,      ;; 24 bit - Line number where defined
+            ;; 8 bit - Argument number. 1 indicates 1st argument.
   metadata  ;; Type descriptor
 }
 </pre>
@@ -771,39 +783,39 @@ DW_TAG_return_variable = 258
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="format_common_intrinsics">Debugger intrinsic functions</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>LLVM uses several intrinsic functions (name prefixed with "llvm.dbg") to
    provide debug information at various points in generated code.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="format_common_declare">llvm.dbg.declare</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <pre>
   void %<a href="#format_common_declare">llvm.dbg.declare</a>(metadata, metadata)
 </pre>
 
 <p>This intrinsic provides information about a local element (ex. variable.) The
-   first argument is metadata holding alloca for the variable.</tt>. The
+   first argument is metadata holding alloca for the variable. The
    second argument is metadata containing description of the variable. </p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="format_common_value">llvm.dbg.value</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <pre>
   void %<a href="#format_common_value">llvm.dbg.value</a>(metadata, i64, metadata)
 </pre>
@@ -815,12 +827,14 @@ DW_TAG_return_variable = 258
    user source variable. </p>
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="format_common_lifetime">Object lifetimes and scoping</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 <p>In many languages, the local variables in functions can have their lifetimes
    or scopes limited to a subset of a function.  In the C family of languages,
    for example, variables are only live (readable and writable) within the
@@ -978,13 +992,15 @@ call void @llvm.dbg.declare(metadata, metadata !12), !dbg !14
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="ccxx_frontend">C/C++ front-end specific debug information</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The C and C++ front-ends represent information about the program in a format
    that is effectively identical
@@ -1005,14 +1021,12 @@ call void @llvm.dbg.declare(metadata, metadata !12), !dbg !14
 <p>The following sections provide examples of various C/C++ constructs and the
    debug information that would best describe those constructs.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ccxx_compile_units">C/C++ source file information</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Given the source files <tt>MySource.cpp</tt> and <tt>MyHeader.h</tt> located
    in the directory <tt>/Users/mine/sources</tt>, the following code:</p>
@@ -1086,11 +1100,11 @@ using <tt>Instruction::getMetadata()</tt> and
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ccxx_global_variable">C/C++ global variable information</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Given an integer global variable declared as follows:</p>
 
@@ -1156,11 +1170,11 @@ int MyGlobal = 100;
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ccxx_subprogram">C/C++ function information</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Given a function declared as follows:</p>
 
@@ -1192,7 +1206,14 @@ int main(int argc, char *argv[]) {
   i32 1,             ;; Line number
   metadata !4,       ;; Type
   i1 false,          ;; Is local 
-  i1 true            ;; Is definition
+  i1 true,           ;; Is definition
+  i32 0,             ;; Virtuality attribute, e.g. pure virtual function
+  i32 0,             ;; Index into virtual table for C++ methods
+  i32 0,             ;; Type that holds virtual table.
+  i32 0,             ;; Flags
+  i1 false,          ;; True if this function is optimized
+  Function *,        ;; Pointer to llvm::Function
+  null               ;; Function template parameters
 }
 ;;
 ;; Define the subprogram itself.
@@ -1206,22 +1227,20 @@ define i32 @main(i32 %argc, i8** %argv) {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ccxx_basic_types">C/C++ basic types</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The following are the basic type descriptors for C/C++ core types:</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ccxx_basic_type_bool">bool</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -1243,11 +1262,11 @@ define i32 @main(i32 %argc, i8** %argv) {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ccxx_basic_char">char</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -1269,11 +1288,11 @@ define i32 @main(i32 %argc, i8** %argv) {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ccxx_basic_unsigned_char">unsigned char</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -1295,11 +1314,11 @@ define i32 @main(i32 %argc, i8** %argv) {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ccxx_basic_short">short</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -1321,11 +1340,11 @@ define i32 @main(i32 %argc, i8** %argv) {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ccxx_basic_unsigned_short">unsigned short</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -1347,11 +1366,11 @@ define i32 @main(i32 %argc, i8** %argv) {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ccxx_basic_int">int</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -1372,11 +1391,11 @@ define i32 @main(i32 %argc, i8** %argv) {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ccxx_basic_unsigned_int">unsigned int</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -1398,11 +1417,11 @@ define i32 @main(i32 %argc, i8** %argv) {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ccxx_basic_long_long">long long</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -1424,11 +1443,11 @@ define i32 @main(i32 %argc, i8** %argv) {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ccxx_basic_unsigned_long_long">unsigned long long</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -1450,11 +1469,11 @@ define i32 @main(i32 %argc, i8** %argv) {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ccxx_basic_float">float</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -1476,11 +1495,11 @@ define i32 @main(i32 %argc, i8** %argv) {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection">
+<h4>
   <a name="ccxx_basic_double">double</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code">
 <pre>
@@ -1501,12 +1520,14 @@ define i32 @main(i32 %argc, i8** %argv) {
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ccxx_derived_types">C/C++ derived types</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Given the following as an example of C/C++ derived type:</p>
 
@@ -1587,11 +1608,11 @@ typedef const int *IntPtr;
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ccxx_composite_types">C/C++ struct/union types</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Given the following as an example of C/C++ struct type:</p>
 
@@ -1700,11 +1721,11 @@ struct Color {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ccxx_enumeration_types">C/C++ enumeration types</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Given the following as an example of C/C++ enumeration type:</p>
 
@@ -1765,6 +1786,8 @@ enum Trees {
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 
 <hr>
@@ -1775,8 +1798,8 @@ enum Trees {
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-02-03 01:22:17 +0100 (Thu, 03 Feb 2011) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/SystemLibrary.html b/docs/SystemLibrary.html
index b81b1a8..57dc239 100644
--- a/docs/SystemLibrary.html
+++ b/docs/SystemLibrary.html
@@ -7,7 +7,7 @@
 </head>
 <body>
 
-<div class="doc_title">System Library</div>
+<h1>System Library</h1>
 <ul>
   <li><a href="#abstract">Abstract</a></li>
   <li><a href="#requirements">Keeping LLVM Portable</a>
@@ -36,8 +36,8 @@
 
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="abstract">Abstract</a></div>
-<div class="doc_text">
+<h2><a name="abstract">Abstract</a></h2>
+<div>
   <p>This document provides some details on LLVM's System Library, located in
   the source at <tt>lib/System</tt> and <tt>include/llvm/System</tt>. The
   library's purpose is to shield LLVM from the differences between operating
@@ -63,21 +63,19 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="requirements">Keeping LLVM Portable</a>
-</div>
-<div class="doc_text">
+</h2>
+<div>
   <p>In order to keep LLVM portable, LLVM developers should adhere to a set of
   portability rules associated with the System Library. Adherence to these rules
   should help the System Library achieve its goal of shielding LLVM from the
   variations in operating system interfaces and doing so efficiently.  The 
   following sections define the rules needed to fulfill this objective.</p>
-</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="headers">Don't Include System Headers</a>
-</div>
-<div class="doc_text">
+<h3><a name="headers">Don't Include System Headers</a></h3>
+<div>
   <p>Except in <tt>lib/System</tt>, no LLVM source code should directly
   <tt>#include</tt> a system header. Care has been taken to remove all such
   <tt>#includes</tt> from LLVM while <tt>lib/System</tt> was being
@@ -91,9 +89,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="expose">Don't Expose System Headers</a>
-</div>
-<div class="doc_text">
+<h3><a name="expose">Don't Expose System Headers</a></h3>
+<div>
   <p>The System Library must shield LLVM from <em>all</em> system headers. To 
   obtain system level functionality, LLVM source must 
   <tt>#include "llvm/System/Thing.h"</tt> and nothing else. This means that 
@@ -103,8 +100,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="c_headers">Use Standard C Headers</a></div>
-<div class="doc_text">
+<h3><a name="c_headers">Use Standard C Headers</a></h3>
+<div>
   <p>The <em>standard</em> C headers (the ones beginning with "c") are allowed
   to be exposed through the <tt>lib/System</tt> interface. These headers and 
   the things they declare are considered to be platform agnostic. LLVM source 
@@ -113,9 +110,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="cpp_headers">Use Standard C++ Headers</a>
-</div>
-<div class="doc_text">
+<h3><a name="cpp_headers">Use Standard C++ Headers</a></h3>
+<div>
   <p>The <em>standard</em> C++ headers from the standard C++ library and
   standard template library may be exposed through the <tt>lib/System</tt>
   interface. These headers and the things they declare are considered to be
@@ -124,8 +120,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="highlev">High Level Interface</a></div>
-<div class="doc_text">
+<h3><a name="highlev">High Level Interface</a></h3>
+<div>
   <p>The entry points specified in the interface of lib/System must be aimed at 
   completing some reasonably high level task needed by LLVM. We do not want to
   simply wrap each operating system call. It would be preferable to wrap several
@@ -143,8 +139,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="nounused">No Unused Functionality</a></div>
-<div class="doc_text">
+<h3><a name="nounused">No Unused Functionality</a></h3>
+<div>
   <p>There must be no functionality specified in the interface of lib/System 
   that isn't actually used by LLVM. We're not writing a general purpose
   operating system wrapper here, just enough to satisfy LLVM's needs. And, LLVM
@@ -153,9 +149,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="nodupl">No Duplicate Implementations</a>
-</div>
-<div class="doc_text">
+<h3><a name="nodupl">No Duplicate Implementations</a></h3>
+<div>
   <p>The implementation of a function for a given platform must be written
   exactly once. This implies that it must be possible to apply a function's 
   implementation to multiple operating systems if those operating systems can
@@ -165,8 +160,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="virtuals">No Virtual Methods</a></div>
-<div class="doc_text">
+<h3><a name="virtuals">No Virtual Methods</a></h3>
+<div>
   <p>The System Library interfaces can be called quite frequently by LLVM. In
   order to make those calls as efficient as possible, we discourage the use of
   virtual methods. There is no need to use inheritance for implementation
@@ -175,8 +170,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="nofunc">No Exposed Functions</a></div>
-<div class="doc_text">
+<h3><a name="nofunc">No Exposed Functions</a></h3>
+<div>
   <p>Any functions defined by system libraries (i.e. not defined by lib/System) 
   must not be exposed through the lib/System interface, even if the header file 
   for that function is not exposed. This prevents inadvertent use of system
@@ -191,8 +186,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="nodata">No Exposed Data</a></div>
-<div class="doc_text">
+<h3><a name="nodata">No Exposed Data</a></h3>
+<div>
   <p>Any data defined by system libraries (i.e. not defined by lib/System) must
   not be exposed through the lib/System interface, even if the header file for
   that function is not exposed. As with functions, this prevents inadvertent use
@@ -200,8 +195,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="softerrors">Minimize Soft Errors</a></div>
-<div class="doc_text">
+<h3><a name="softerrors">Minimize Soft Errors</a></h3>
+<div>
   <p>Operating system interfaces will generally provide error results for every
   little thing that could go wrong. In almost all cases, you can divide these
   error results into two groups: normal/good/soft and abnormal/bad/hard. That
@@ -239,9 +234,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="throw_spec">No throw Specifications</a>
-</div>
-<div class="doc_text">
+<h3><a name="throw_spec">No throw Specifications</a></h3>
+<div>
   <p>None of the lib/System interface functions may be declared with C++ 
   <tt>throw()</tt> specifications on them. This requirement makes sure that the
   compiler does not insert additional exception handling code into the interface
@@ -252,8 +246,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="organization">Code Organization</a></div>
-<div class="doc_text">
+<h3><a name="organization">Code Organization</a></h3>
+<div>
   <p>Implementations of the System Library interface are separated by their
   general class of operating system. Currently only Unix and Win32 classes are
   defined but more could be added for other operating system classifications.
@@ -281,8 +275,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="semantics">Consistent Semantics</a></div>
-<div class="doc_text">
+<h3><a name="semantics">Consistent Semantics</a></h3>
+<div>
   <p>The implementation of a lib/System interface can vary drastically between
   platforms. That's okay as long as the end result of the interface function 
   is the same. For example, a function to create a directory is pretty straight
@@ -296,12 +290,14 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="bug">Bug 351</a></div>
-<div class="doc_text">
+<h3><a name="bug">Bug 351</a></h3>
+<div>
   <p>See <a href="http://llvm.org/PR351">bug 351</a>
   for further details on the progress of this work</p>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 
 <hr>
@@ -312,8 +308,8 @@
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/TableGenFundamentals.html b/docs/TableGenFundamentals.html
index d118332..37ca046 100644
--- a/docs/TableGenFundamentals.html
+++ b/docs/TableGenFundamentals.html
@@ -7,9 +7,9 @@
 </head>
 <body>
 
-<div class="doc_title">TableGen Fundamentals</div>
+<h1>TableGen Fundamentals</h1>
 
-<div class="doc_text">
+<div>
 <ul>
   <li><a href="#introduction">Introduction</a>
   <ol>
@@ -50,10 +50,10 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="introduction">Introduction</a></div>
+<h2><a name="introduction">Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>TableGen's purpose is to help a human develop and maintain records of
 domain-specific information.  Because there may be a large number of these
@@ -72,12 +72,10 @@ find an emacs "TableGen mode" and a vim language file in the
 <tt>llvm/utils/emacs</tt> and <tt>llvm/utils/vim</tt> directories of your LLVM
 distribution, respectively.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="concepts">Basic concepts</a></div>
+<h3><a name="concepts">Basic concepts</a></h3>
 
-<div class="doc_text">
+<div>
 
 <p>TableGen files consist of two key parts: 'classes' and 'definitions', both
 of which are considered 'records'.</p>
@@ -112,9 +110,9 @@ multiclass, as if they were declared in the current multiclass.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="example">An example record</a></div>
+<h3><a name="example">An example record</a></h3>
 
-<div class="doc_text">
+<div>
 
 <p>With no other arguments, TableGen parses the specified file and prints out
 all of the classes, then all of the definitions.  This is a good way to see what
@@ -212,9 +210,9 @@ abstractions they prefer to use when describing their information.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="running">Running TableGen</a></div>
+<h3><a name="running">Running TableGen</a></h3>
 
-<div class="doc_text">
+<div>
 
 <p>TableGen runs just like any other LLVM tool.  The first (optional) argument
 specifies the file to read.  If a filename is not specified, <tt>tblgen</tt>
@@ -256,27 +254,28 @@ what you need and formats it in the appropriate way.</p>
 
 </div>
 
+</div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="syntax">TableGen syntax</a></div>
+<h2><a name="syntax">TableGen syntax</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>TableGen doesn't care about the meaning of data (that is up to the backend to
 define), but it does care about syntax, and it enforces a simple type system.
 This section describes the syntax and the constructs allowed in a TableGen file.
 </p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="primitives">TableGen primitives</a></div>
+<h3><a name="primitives">TableGen primitives</a></h3>
+
+<div>
 
 <!-- -------------------------------------------------------------------------->
-<div class="doc_subsubsection"><a name="comments">TableGen comments</a></div>
+<h4><a name="comments">TableGen comments</a></h4>
 
-<div class="doc_text">
+<div>
 
 <p>TableGen supports BCPL style "<tt>//</tt>" comments, which run to the end of
 the line, and it also supports <b>nestable</b> "<tt>/* */</tt>" comments.</p>
@@ -284,11 +283,11 @@ the line, and it also supports <b>nestable</b> "<tt>/* */</tt>" comments.</p>
 </div>
 
 <!-- -------------------------------------------------------------------------->
-<div class="doc_subsubsection">
+<h4>
   <a name="types">The TableGen type system</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>TableGen files are strongly typed, in a simple (but complete) type-system.
 These types are used to perform automatic conversions, check for errors, and to
@@ -344,11 +343,11 @@ needed.</p>
 </div>
 
 <!-- -------------------------------------------------------------------------->
-<div class="doc_subsubsection">
+<h4>
   <a name="values">TableGen values and expressions</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>TableGen allows for a pretty reasonable number of different expression forms
 when building up values.  These forms allow the TableGen file to be written in a
@@ -433,12 +432,14 @@ to a "<tt>bits&lt;4&gt;</tt>" value, for example.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="classesdefs">Classes and definitions</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>As mentioned in the <a href="#concepts">intro</a>, classes and definitions
 (collectively known as 'records') in TableGen are the main high-level unit of
@@ -473,14 +474,12 @@ between a group of records and isolating it in a single place.  Also, classes
 permit the specification of default values for their subclasses, allowing the
 subclasses to override them as they wish.</p>
 
-</div>
-
 <!---------------------------------------------------------------------------->
-<div class="doc_subsubsection">
+<h4>
   <a name="valuedef">Value definitions</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Value definitions define named entries in records.  A value must be defined
 before it can be referred to as the operand for another value definition or
@@ -492,11 +491,11 @@ equal sign.  Value definitions require terminating semicolons.</p>
 </div>
 
 <!-- -------------------------------------------------------------------------->
-<div class="doc_subsubsection">
+<h4>
   <a name="recordlet">'let' expressions</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>A record-level let expression is used to change the value of a value
 definition in a record.  This is primarily useful when a superclass defines a
@@ -519,11 +518,11 @@ because the <tt>D</tt> class overrode its value.</p>
 </div>
 
 <!-- -------------------------------------------------------------------------->
-<div class="doc_subsubsection">
+<h4>
   <a name="templateargs">Class template arguments</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>TableGen permits the definition of parameterized classes as well as normal
 concrete classes.  Parameterized TableGen classes specify a list of variable
@@ -610,11 +609,11 @@ X86 backend.</p>
 </div>
 
 <!-- -------------------------------------------------------------------------->
-<div class="doc_subsubsection">
+<h4>
   <a name="multiclass">Multiclass definitions and instances</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 While classes with template arguments are a good way to factor commonality
@@ -772,17 +771,21 @@ before them.
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="filescope">File scope entities</a>
-</div>
+</h3>
+
+<div>
 
 <!-- -------------------------------------------------------------------------->
-<div class="doc_subsubsection">
+<h4>
   <a name="include">File inclusion</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>TableGen supports the '<tt>include</tt>' token, which textually substitutes
 the specified file in place of the include directive.  The filename should be
 specified as a double quoted string immediately after the '<tt>include</tt>'
@@ -797,11 +800,11 @@ keyword.  Example:</p>
 </div>
 
 <!-- -------------------------------------------------------------------------->
-<div class="doc_subsubsection">
+<h4>
   <a name="globallet">'let' expressions</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>"Let" expressions at file scope are similar to <a href="#recordlet">"let"
 expressions within a record</a>, except they can specify a value binding for
@@ -864,11 +867,15 @@ several levels of multiclass instanciations. This also avoids the need of using
 </pre>
 </div>
 
+</div>
+
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="codegen">Code Generator backend info</a></div>
+<h2><a name="codegen">Code Generator backend info</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Expressions used by code generator to describe instructions and isel
 patterns:</p>
@@ -882,10 +889,10 @@ patterns:</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="backends">TableGen backends</a></div>
+<h2><a name="backends">TableGen backends</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>TODO: How they work, how to write one.  This section should not contain
 details about any particular backend, except maybe -print-enums as an example.
@@ -903,8 +910,8 @@ This should highlight the APIs in <tt>TableGen/Record.h</tt>.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-01-07 18:05:37 +0100 (Fri, 07 Jan 2011) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html
index 964bdc3..4fc4c70 100644
--- a/docs/TestingGuide.html
+++ b/docs/TestingGuide.html
@@ -7,9 +7,9 @@
 </head>
 <body>
       
-<div class="doc_title">
+<h1>
   LLVM Testing Infrastructure Guide
-</div>
+</h1>
 
 <ol>
   <li><a href="#overview">Overview</a></li>
@@ -52,10 +52,10 @@
 </div>
 
 <!--=========================================================================-->
-<div class="doc_section"><a name="overview">Overview</a></div>
+<h2><a name="overview">Overview</a></h2>
 <!--=========================================================================-->
 
-<div class="doc_text">
+<div>
 
 <p>This document is the reference manual for the LLVM testing infrastructure. It
 documents the structure of the LLVM testing infrastructure, the tools needed to
@@ -64,10 +64,10 @@ use it, and how to add and run tests.</p>
 </div>
 
 <!--=========================================================================-->
-<div class="doc_section"><a name="requirements">Requirements</a></div>
+<h2><a name="requirements">Requirements</a></h2>
 <!--=========================================================================-->
 
-<div class="doc_text">
+<div>
 
 <p>In order to use the LLVM testing infrastructure, you will need all of the
 software required to build LLVM, as well
@@ -76,10 +76,10 @@ as <a href="http://python.org">Python</a> 2.4 or later.</p>
 </div>
 
 <!--=========================================================================-->
-<div class="doc_section"><a name="org">LLVM testing infrastructure organization</a></div>
+<h2><a name="org">LLVM testing infrastructure organization</a></h2>
 <!--=========================================================================-->
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM testing infrastructure contains two major categories of tests:
 regression tests and whole programs. The regression tests are contained inside
@@ -89,13 +89,11 @@ referred to as the "LLVM test suite" and are in the <tt>test-suite</tt> module
 in subversion.
 </p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="regressiontests">Regression tests</a></div>
+<h3><a name="regressiontests">Regression tests</a></h3>
 <!-- _______________________________________________________________________ -->
 
-<div class="doc_text">
+<div>
 
 <p>The regression tests are small pieces of code that test a specific feature of
 LLVM or trigger a specific bug in LLVM.  They are usually written in LLVM
@@ -119,10 +117,10 @@ application or benchmark.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="testsuite">Test suite</a></div>
+<h3><a name="testsuite">Test suite</a></h3>
 <!-- _______________________________________________________________________ -->
 
-<div class="doc_text">
+<div>
 
 <p>The test suite contains whole programs, which are pieces of
 code which can be compiled and linked into a stand-alone program that can be
@@ -144,11 +142,10 @@ generates code.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="debuginfotests">Debugging Information 
-tests</a></div>
+<h3><a name="debuginfotests">Debugging Information tests</a></h3>
 <!-- _______________________________________________________________________ -->
 
-<div class="doc_text">
+<div>
 
 <p>The test suite contains tests to check quality of debugging information.
 The test are written in C based languages or in LLVM assembly language. </p>
@@ -160,11 +157,13 @@ test suite for more information . This test suite is located in the
 
 </div>
 
+</div>
+
 <!--=========================================================================-->
-<div class="doc_section"><a name="quick">Quick start</a></div>
+<h2><a name="quick">Quick start</a></h2>
 <!--=========================================================================-->
 
-<div class="doc_text">
+<div>
 
   <p>The tests are located in two separate Subversion modules. The regressions
   tests are in the main "llvm" module under the directory
@@ -179,7 +178,8 @@ the <tt>test-suite</tt> directory will be automatically configured.
 Alternatively, you can configure the <tt>test-suite</tt> module manually.</p>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="quickregressiontests">Regression tests</a></div>
+<h3><a name="quickregressiontests">Regression tests</a></h3>
+<div>
 <!-- _______________________________________________________________________ -->
 <p>To run all of the LLVM regression tests, use master Makefile in
  the <tt>llvm/test</tt> directory:</p>
@@ -198,7 +198,7 @@ Alternatively, you can configure the <tt>test-suite</tt> module manually.</p>
 </pre>
 </div>
 
-<p>If you have <a href="http://clang.llvm.org">Clang</a> checked out and built,
+<p>If you have <a href="http://clang.llvm.org/">Clang</a> checked out and built,
 you can run the LLVM and Clang tests simultaneously using:</p>
 
 <p>or</p>
@@ -239,10 +239,14 @@ script which is built as part of LLVM. For example, to run the
 <p>For more information on using the 'lit' tool, see 'llvm-lit --help' or the
 'lit' man page.</p>
 
+</div>
+
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="quicktestsuite">Test suite</a></div>
+<h3><a name="quicktestsuite">Test suite</a></h3>
 <!-- _______________________________________________________________________ -->
 
+<div>
+
 <p>To run the comprehensive test suite (tests that compile and execute whole 
 programs), first checkout and setup the <tt>test-suite</tt> module:</p>
 
@@ -292,9 +296,10 @@ that subdirectory.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="quickdebuginfotests">Debugging Information 
-tests</a></div>
+<h3><a name="quickdebuginfotests">Debugging Information tests</a></h3>
+<div>
 <!-- _______________________________________________________________________ -->
+<div>
 
 <p> To run debugging information tests simply checkout the tests inside
 clang/test directory. </p>
@@ -310,10 +315,14 @@ clang/test directory. </p>
 
 </div>
 
+</div>
+
+</div>
+
 <!--=========================================================================-->
-<div class="doc_section"><a name="rtstructure">Regression test structure</a></div>
+<h2><a name="rtstructure">Regression test structure</a></h2>
 <!--=========================================================================-->
-<div class="doc_text">
+<div>
   <p>The LLVM regression tests are driven by 'lit' and are located in
   the <tt>llvm/test</tt> directory.
 
@@ -335,12 +344,10 @@ clang/test directory. </p>
     <li><tt>Verifier</tt>: tests the IR verifier.</li>
   </ul>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="rtcustom">Writing new regression tests</a></div>
+<h3><a name="rtcustom">Writing new regression tests</a></h3>
 <!-- _______________________________________________________________________ -->
-<div class="doc_text">
+<div>
   <p>The regression test structure is very simple, but does require some
   information to be set. This information is gathered via <tt>configure</tt> and
   is written to a file, <tt>lit.site.cfg</tt>
@@ -492,10 +499,10 @@ negatives).</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="FileCheck">The FileCheck utility</a></div>
+<h3><a name="FileCheck">The FileCheck utility</a></h3>
 <!-- _______________________________________________________________________ -->
 
-<div class="doc_text">
+<div>
 
 <p>A powerful feature of the RUN: lines is that it allows any arbitrary commands
    to be executed as part of the test harness.  While standard (portable) unix
@@ -561,13 +568,12 @@ is a "subl" in between those labels.  If it existed somewhere else in the file,
 that would not count: "grep subl" matches if subl exists anywhere in the
 file.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a 
-name="FileCheck-check-prefix">The FileCheck -check-prefix option</a></div>
+<h4>
+  <a name="FileCheck-check-prefix">The FileCheck -check-prefix option</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The FileCheck -check-prefix option allows multiple test configurations to be
 driven from one .ll file.  This is useful in many circumstances, for example,
@@ -598,10 +604,11 @@ both 32-bit and 64-bit code generation.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a 
-name="FileCheck-CHECK-NEXT">The "CHECK-NEXT:" directive</a></div>
+<h4>
+  <a name="FileCheck-CHECK-NEXT">The "CHECK-NEXT:" directive</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Sometimes you want to match lines and would like to verify that matches
 happen on exactly consecutive lines with no other lines in between them.  In
@@ -638,10 +645,11 @@ directive in a file.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a 
-name="FileCheck-CHECK-NOT">The "CHECK-NOT:" directive</a></div>
+<h4>
+  <a name="FileCheck-CHECK-NOT">The "CHECK-NOT:" directive</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The CHECK-NOT: directive is used to verify that a string doesn't occur
 between two matches (or the first match and the beginning of the file).  For
@@ -668,10 +676,11 @@ define i8 @coerce_offset0(i32 %V, i32* %P) {
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a 
-name="FileCheck-Matching">FileCheck Pattern Matching Syntax</a></div>
+<h4>
+  <a name="FileCheck-Matching">FileCheck Pattern Matching Syntax</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The CHECK: and CHECK-NOT: directives both take a pattern to match.  For most
 uses of FileCheck, fixed string matching is perfectly sufficient.  For some
@@ -700,10 +709,11 @@ braces explicitly from the input, you can use something ugly like
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a 
-name="FileCheck-Variables">FileCheck Variables</a></div>
+<h4>
+  <a name="FileCheck-Variables">FileCheck Variables</a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>It is often useful to match a pattern and then verify that it occurs again
 later in the file.  For codegen tests, this can be useful to allow any register,
@@ -738,11 +748,12 @@ define two separate CHECK lines that match on the same line.
 
 </div>
 
+</div>
+
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="rtvars">Variables and
-substitutions</a></div>
+<h3><a name="rtvars">Variables and substitutions</a></h3>
 <!-- _______________________________________________________________________ -->
-<div class="doc_text">
+<div>
   <p>With a RUN line there are a number of substitutions that are permitted. In
   general, any Tcl variable that is available in the <tt>substitute</tt> 
   function (in <tt>test/lib/llvm.exp</tt>) can be substituted into a RUN line.
@@ -835,9 +846,9 @@ substitutions</a></div>
 </div>
   
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="rtfeatures">Other Features</a></div>
+<h3><a name="rtfeatures">Other Features</a></h3>
 <!-- _______________________________________________________________________ -->
-<div class="doc_text">
+<div>
   <p>To make RUN line writing easier, there are several shell scripts located
   in the <tt>llvm/test/Scripts</tt> directory. This directory is in the PATH
   when running tests, so you can just call these scripts using their name. For
@@ -892,12 +903,13 @@ substitutions</a></div>
 
 </div>
 
+</div>
+
 <!--=========================================================================-->
-<div class="doc_section"><a name="testsuitestructure">Test suite
-Structure</a></div>
+<h2><a name="testsuitestructure">Test suite Structure</a></h2>
 <!--=========================================================================-->
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>test-suite</tt> module contains a number of programs that can be compiled 
 with LLVM and executed. These programs are compiled using the native compiler
@@ -962,10 +974,10 @@ will help you separate benign warnings from actual test failures.</p>
 </div>
 
 <!--=========================================================================-->
-<div class="doc_section"><a name="testsuiterun">Running the test suite</a></div>
+<h2><a name="testsuiterun">Running the test suite</a></h2>
 <!--=========================================================================-->
 
-<div class="doc_text">
+<div>
 
 <p>First, all tests are executed within the LLVM object directory tree.  They
 <i>are not</i> executed inside of the LLVM source tree. This is because the
@@ -1020,14 +1032,13 @@ test suite creates temporary files during execution.</p>
 have the suite checked out and configured, you don't need to do it again (unless
 the test code or configure script changes).</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection">
-<a name="testsuiteexternal">Configuring External Tests</a></div>
+<h3>
+  <a name="testsuiteexternal">Configuring External Tests</a>
+</h3>
 <!-- _______________________________________________________________________ -->
 
-<div class="doc_text">
+<div>
 <p>In order to run the External tests in the <tt>test-suite</tt>
   module, you must specify <i>--with-externals</i>.  This
   must be done during the <em>re-configuration</em> step (see above),
@@ -1055,10 +1066,11 @@ the test code or configure script changes).</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection">
-<a name="testsuitetests">Running different tests</a></div>
+<h3>
+  <a name="testsuitetests">Running different tests</a>
+</h3>
 <!-- _______________________________________________________________________ -->
-<div class="doc_text">
+<div>
 <p>In addition to the regular "whole program" tests, the <tt>test-suite</tt>
 module also provides a mechanism for compiling the programs in different ways.
 If the variable TEST is defined on the <tt>gmake</tt> command line, the test system will
@@ -1078,10 +1090,11 @@ LLVM.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection">
-<a name="testsuiteoutput">Generating test output</a></div>
+<h3>
+  <a name="testsuiteoutput">Generating test output</a>
+</h3>
 <!-- _______________________________________________________________________ -->
-<div class="doc_text">
+<div>
   <p>There are a number of ways to run the tests and generate output. The most
   simple one is simply running <tt>gmake</tt> with no arguments. This will
   compile and run all programs in the tree using a number of different methods
@@ -1109,11 +1122,12 @@ LLVM.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection">
-<a name="testsuitecustom">Writing custom tests for the test suite</a></div>
+<h3>
+  <a name="testsuitecustom">Writing custom tests for the test suite</a>
+</h3>
 <!-- _______________________________________________________________________ -->
 
-<div class="doc_text">
+<div>
 
 <p>Assuming you can run the test suite, (e.g. "<tt>gmake TEST=nightly report</tt>"
 should work), it is really easy to run optimizations or code generator
@@ -1179,6 +1193,8 @@ example reports that can do fancy stuff.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 
 <hr>
@@ -1189,8 +1205,8 @@ example reports that can do fancy stuff.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner<br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-02-15 10:23:02 +0100 (Tue, 15 Feb 2011) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/UsingLibraries.html b/docs/UsingLibraries.html
index ea28dbe..2c1c69a 100644
--- a/docs/UsingLibraries.html
+++ b/docs/UsingLibraries.html
@@ -5,7 +5,7 @@
   <link rel="stylesheet" href="llvm.css" type="text/css">
 </head>
 <body>
-<div class="doc_title">Using The LLVM Libraries</div>
+<h1>Using The LLVM Libraries</h1>
 <ol>
   <li><a href="#abstract">Abstract</a></li>
   <li><a href="#introduction">Introduction</a></li>
@@ -26,12 +26,12 @@
 <p class="doc_warning">Warning: This document is out of date, for more
   information please
   see <a href="CommandGuide/html/llvm-config.html">llvm-config</a> or,
-  if you use CMake, <a href=CMake.html#embedding>the CMake LLVM
+  if you use CMake, <a href="CMake.html#embedding">the CMake LLVM
   guide</a>.</p>
 
 <!-- ======================================================================= -->
-<div class="doc_section"><a name="abstract">Abstract</a></div>
-<div class="doc_text">
+<h2><a name="abstract">Abstract</a></h2>
+<div>
   <p>Amongst other things, LLVM is a toolkit for building compilers, linkers,
   runtime executives, virtual machines, and other program execution related
   tools. In addition to the LLVM tool set, the functionality of LLVM is
@@ -45,8 +45,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_section"> <a name="introduction">Introduction</a></div>
-<div class="doc_text">
+<h2><a name="introduction">Introduction</a></h2>
+<div>
   <p>If you're writing a compiler, virtual machine, or any other utility based 
   on LLVM, you'll need to figure out which of the many libraries files you will 
   need to link with to be successful. An understanding of the contents of these 
@@ -74,8 +74,8 @@
   correct for your tool can sometimes be challenging.
 </div>
 <!-- ======================================================================= -->
-<div class="doc_section"><a name="descriptions"></a>Library Descriptions</div>
-<div class="doc_text">
+<h2><a name="descriptions">Library Descriptions</a></h2>
+<div>
   <p>The table below categorizes each library
 <table style="text-align:left">
   <tr><th>Library</th><th>Forms</th><th>Description</th></tr>
@@ -152,8 +152,8 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_section"><a name="dependencies"></a>Using llvm-config</div>
-<div class="doc_text">
+<h2><a name="dependencies">Using llvm-config</a></h2>
+<div>
   <p>The <tt>llvm-config</tt> tool is a perl script that produces on its output
   various kinds of information. For example, the source or object directories 
   used to build LLVM can be accessed by passing options to <tt>llvm-config</tt>.
@@ -187,16 +187,16 @@
   <!-- === This should be updated whenever new libraries are added,       ===-->
   <!-- === removed, or changed                                            ===-->
   <!-- =======NOTE: =========================================================-->
-  <h2>Dependency Relationships Of Libraries</h2>
+  <h3>Dependency Relationships Of Libraries</h3>
   <p>This graph shows the dependency of archive libraries on other archive 
   libraries or objects. Where a library has both archive and object forms, only
   the archive form is shown.</p>
-  <img src="img/libdeps.gif" alt="Library Dependencies"/>
-  <h2>Dependency Relationships Of Object Files</h2>
+  <img src="img/libdeps.gif" alt="Library Dependencies">
+  <h3>Dependency Relationships Of Object Files</h3>
   <p>This graph shows the dependency of object files on archive libraries or 
   other objects. Where a library has both object and archive forms, only the 
   dependency to the archive form is shown.</p> 
-  <img src="img/objdeps.gif" alt="Object File Dependencies"/>
+  <img src="img/objdeps.gif" alt="Object File Dependencies">
   <p>The following list shows the dependency relationships between libraries in
   textual form. The information is the same as shown on the graphs but arranged
   alphabetically.</p>
@@ -280,8 +280,8 @@
     <li>libLLVMSystem.a</li>
     <li>libLLVMbzip2.a</li>
   </ul></dd>
-  <dt><b>libLLVMSystem.a</b></dt><dd><ul>
-  </ul></dd>
+  <dt><b>libLLVMSystem.a</b></dt><dd>
+  </dd>
   <dt><b>libLLVMTarget.a</b></dt><dd><ul>
     <li>libLLVMCore.a</li>
     <li>libLLVMSupport.a</li>
@@ -295,8 +295,8 @@
     <li>libLLVMTarget.a</li>
     <li>libLLVMipa.a</li>
   </ul></dd>
-  <dt><b>libLLVMbzip2.a</b></dt><dd><ul>
-  </ul></dd>
+  <dt><b>libLLVMbzip2.a</b></dt><dd>
+  </dd>
   <dt><b>libLLVMipa.a</b></dt><dd><ul>
     <li>libLLVMAnalysis.a</li>
     <li>libLLVMCore.a</li>
@@ -401,42 +401,46 @@
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_section"><a name="rot">Linkage Rules Of Thumb</a></div>
-<div class="doc_text">
+<h2><a name="rot">Linkage Rules Of Thumb</a></h2>
+<div>
 	<p>This section contains various "rules of thumb" about what files you
 	should link into your programs.</p>
-</div>
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="always">Always Link LLVMCore, LLVMSupport,
-    and LLVMSystem</a></div>
-<div class="doc_text">
+<h3>
+  <a name="always">Always Link LLVMCore, LLVMSupport, and LLVMSystem</a>
+</h3>
+<div>
   <p>No matter what you do with LLVM, the last three entries in the value of 
   your LLVMLIBS make variable should always be: 
   <tt>LLVMCore LLVMSupport.a LLVMSystem.a</tt>. There are no <tt>LLVM</tt> 
   programs that don't depend on these three.</p>
 </div>
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="onlyone">Never link both archive and
-    re-linked library</a></div>
-<div class="doc_text">
+<h3>
+  <a name="onlyone">Never link both archive and re-linked library</a>
+</h3>
+<div>
   <p>There is never any point to linking both the re-linked (<tt>.o</tt>) and
   the archive (<tt>.a</tt>) versions of a library. Since the re-linked version
   includes the entire library, the archive version will not resolve any symbols.
   You could even end up with link error if you place the archive version before
   the re-linked version on the linker's command line.</p>
 </div>
+
+</div>
+
 <!-- ======================================================================= -->
 <hr>
 <div class="doc_footer">
 <address>
   <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-    src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"/></a>
+    src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
   <a href="http://validator.w3.org/check/referer"><img
     src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>
 </address>
-<a href="http://llvm.org">The LLVM Compiler Infrastructure</a> 
-<br>Last modified: $Date: 2010-09-17 02:30:52 +0200 (Fri, 17 Sep 2010) $ </div>
+<a href="http://llvm.org/">The LLVM Compiler Infrastructure</a> 
+<br>Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ </div>
 </body>
 </html>
 <!-- vim: sw=2 ts=2 ai
diff --git a/docs/WritingAnLLVMBackend.html b/docs/WritingAnLLVMBackend.html
index 193a1d4..c4892cb 100644
--- a/docs/WritingAnLLVMBackend.html
+++ b/docs/WritingAnLLVMBackend.html
@@ -9,9 +9,9 @@
 
 <body>
 
-<div class="doc_title">
+<h1>
   Writing an LLVM Compiler Backend
-</div>
+</h1>
 
 <ol>
   <li><a href="#intro">Introduction</a>
@@ -61,12 +61,12 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="intro">Introduction</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 This document describes techniques for writing compiler backends that convert
@@ -91,13 +91,11 @@ characteristics, such as a RISC instruction set and straightforward calling
 conventions.
 </p>
 
-</div>
-
-<div class="doc_subsection">
+<h3>
   <a name="Audience">Audience</a>
-</div>  
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 The audience for this document is anyone who needs to write an LLVM backend to
@@ -106,21 +104,21 @@ generate code for a specific hardware or software target.
 
 </div>
 
-<div class="doc_subsection">
+<h3>
   <a name="Prerequisite">Prerequisite Reading</a>
-</div>  
+</h3>
 
-<div class="doc_text">  
+<div>  
 
 <p>
 These essential documents must be read before reading this document:
 </p>
 
 <ul>
-<li><i><a href="http://www.llvm.org/docs/LangRef.html">LLVM Language Reference
+<li><i><a href="LangRef.html">LLVM Language Reference
     Manual</a></i> &mdash; a reference manual for the LLVM assembly language.</li>
 
-<li><i><a href="http://www.llvm.org/docs/CodeGenerator.html">The LLVM
+<li><i><a href="CodeGenerator.html">The LLVM
     Target-Independent Code Generator</a></i> &mdash; a guide to the components
     (classes and code generation algorithms) for translating the LLVM internal
     representation into machine code for a specified target.  Pay particular
@@ -129,14 +127,14 @@ These essential documents must be read before reading this document:
     Allocation, Prolog/Epilog Code Insertion, Late Machine Code Optimizations,
     and Code Emission.</li>
 
-<li><i><a href="http://www.llvm.org/docs/TableGenFundamentals.html">TableGen
+<li><i><a href="TableGenFundamentals.html">TableGen
     Fundamentals</a></i> &mdash;a document that describes the TableGen
     (<tt>tblgen</tt>) application that manages domain-specific information to
     support LLVM code generation. TableGen processes input from a target
     description file (<tt>.td</tt> suffix) and generates C++ code that can be
     used for code generation.</li>
 
-<li><i><a href="http://www.llvm.org/docs/WritingAnLLVMPass.html">Writing an LLVM
+<li><i><a href="WritingAnLLVMPass.html">Writing an LLVM
     Pass</a></i> &mdash; The assembly printer is a <tt>FunctionPass</tt>, as are
     several SelectionDAG processing steps.</li>
 </ul>
@@ -155,11 +153,11 @@ machine dependent features.
 
 </div>
 
-<div class="doc_subsection">
+<h3>
   <a name="Basic">Basic Steps</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 To write a compiler backend for LLVM that converts the LLVM IR to code for a
@@ -220,17 +218,17 @@ that the class will need and which components will need to be subclassed.
 
 </div>
 
-<div class="doc_subsection">
+<h3>
   <a name="Preliminaries">Preliminaries</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 To actually create your compiler backend, you need to create and modify a few
 files. The absolute minimum is discussed here. But to actually use the LLVM
 target-independent code generator, you must perform the steps described in
-the <a href="http://www.llvm.org/docs/CodeGenerator.html">LLVM
+the <a href="CodeGenerator.html">LLVM
 Target-Independent Code Generator</a> document.
 </p>
 
@@ -281,13 +279,15 @@ regenerate configure by running <tt>./autoconf/AutoRegen.sh</tt>.
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="TargetMachine">Target Machine</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 <tt>LLVMTargetMachine</tt> is designed as a base class for targets implemented
@@ -360,11 +360,6 @@ public:
 </pre>
 </div>
 
-</div>
-
-
-<div class="doc_text">
-
 <ul>
 <li><tt>getInstrInfo()</tt></li>
 <li><tt>getRegisterInfo()</tt></li>
@@ -398,10 +393,6 @@ SparcTargetMachine::SparcTargetMachine(const Module &amp;M, const std::string &a
 </pre>
 </div>
 
-</div>
-
-<div class="doc_text">
-
 <p>Hyphens separate portions of the <tt>TargetDescription</tt> string.</p>
 
 <ul>
@@ -424,12 +415,12 @@ SparcTargetMachine::SparcTargetMachine(const Module &amp;M, const std::string &a
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="TargetRegistration">Target Registration</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 You must also register your target with the <tt>TargetRegistry</tt>, which is
@@ -480,12 +471,12 @@ For more information, see
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="RegisterSet">Register Set and Register Classes</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 You should describe a concrete target-specific class that represents the
@@ -514,14 +505,12 @@ input files and placed in <tt>XXXGenRegisterInfo.h.inc</tt> and
 implementation of <tt>XXXRegisterInfo</tt> requires hand-coding.
 </p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="RegisterDef">Defining a Register</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 The <tt>XXXRegisterInfo.td</tt> file typically starts with register definitions
@@ -700,11 +689,11 @@ fields of a register's TargetRegisterDesc.
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="RegisterClassDef">Defining a Register Class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 The <tt>RegisterClass</tt> class (specified in <tt>Target.td</tt>) is used to
@@ -894,12 +883,12 @@ namespace SP {   // Register class instances
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="implementRegister">Implement a subclass of</a> 
-  <a href="http://www.llvm.org/docs/CodeGenerator.html#targetregisterinfo">TargetRegisterInfo</a>
-</div>
+  <a href="CodeGenerator.html#targetregisterinfo">TargetRegisterInfo</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 The final step is to hand code portions of <tt>XXXRegisterInfo</tt>, which
@@ -933,13 +922,15 @@ implementation in <tt>SparcRegisterInfo.cpp</tt>:
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="InstructionSet">Instruction Set</a>
-</div>
+</h2>
 
 <!-- *********************************************************************** -->
-<div class="doc_text">
+<div>
 
 <p>
 During the early stages of code generation, the LLVM IR code is converted to a
@@ -1103,7 +1094,7 @@ The fifth parameter is a string that is used by the assembly printer and can be
 left as an empty string until the assembly printer interface is implemented. The
 sixth and final parameter is the pattern used to match the instruction during
 the SelectionDAG Select Phase described in
-(<a href="http://www.llvm.org/docs/CodeGenerator.html">The LLVM
+(<a href="CodeGenerator.html">The LLVM
 Target-Independent Code Generator</a>).  This parameter is detailed in the next
 section, <a href="#InstructionSelector">Instruction Selector</a>.
 </p>
@@ -1188,14 +1179,12 @@ correspond to the values in <tt>SparcInstrInfo.td</tt>. I.e.,
 <tt>SPCC::ICC_NE = 9</tt>, <tt>SPCC::FCC_U = 23</tt> and so on.)
 </p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="operandMapping">Instruction Operand Mapping</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 The code generator backend maps instruction operands to fields in the
@@ -1283,12 +1272,12 @@ the <tt>rd</tt>, <tt>rs1</tt>, and <tt>rs2</tt> fields respectively.
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="implementInstr">Implement a subclass of </a>
-  <a href="http://www.llvm.org/docs/CodeGenerator.html#targetinstrinfo">TargetInstrInfo</a>
-</div>
+  <a href="CodeGenerator.html#targetinstrinfo">TargetInstrInfo</a>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 The final step is to hand code portions of <tt>XXXInstrInfo</tt>, which
@@ -1327,10 +1316,10 @@ implementation in <tt>SparcInstrInfo.cpp</tt>:
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="branchFolding">Branch Folding and If Conversion</a>
-</div>
-<div class="doc_text">
+</h3>
+<div>
 
 <p>
 Performance can be improved by combining instructions or by eliminating
@@ -1485,13 +1474,15 @@ branch.
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="InstructionSelector">Instruction Selector</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 LLVM uses a <tt>SelectionDAG</tt> to represent LLVM IR instructions, and nodes
@@ -1533,7 +1524,7 @@ selection pass into the queue of passes to run.
 The LLVM static compiler (<tt>llc</tt>) is an excellent tool for visualizing the
 contents of DAGs. To display the <tt>SelectionDAG</tt> before or after specific
 processing phases, use the command line options for <tt>llc</tt>, described
-at <a href="http://llvm.org/docs/CodeGenerator.html#selectiondag_process">
+at <a href="CodeGenerator.html#selectiondag_process">
 SelectionDAG Instruction Selection Process</a>.
 </p>
 
@@ -1642,14 +1633,12 @@ SDNode *Select_ISD_STORE(const SDValue &amp;N) {
 </pre>
 </div>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="LegalizePhase">The SelectionDAG Legalize Phase</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 The Legalize phase converts a DAG to use types and operations that are natively
@@ -1716,14 +1705,12 @@ a <tt>LegalAction</tt> type enum value: <tt>Promote</tt>, <tt>Expand</tt>,
 contains examples of all four <tt>LegalAction</tt> values.
 </p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="promote">Promote</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 For an operation without native support for a given type, the specified type may
@@ -1742,11 +1729,11 @@ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="expand">Expand</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 For a type without native support, a value may need to be broken down further,
@@ -1767,11 +1754,11 @@ setOperationAction(ISD::FCOS, MVT::f32, Expand);
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="custom">Custom</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 For some operations, simple type promotion or operation expansion may be
@@ -1833,11 +1820,11 @@ static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &amp;DAG) {
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="legal">Legal</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>
 The <tt>Legal</tt> LegalizeAction enum value simply indicates that an
@@ -1865,12 +1852,14 @@ if (TM.getSubtarget&lt;SparcSubtarget&gt;().isV9())
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="callingConventions">Calling Conventions</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 To support target-specific calling conventions, <tt>XXXGenCallingConv.td</tt>
@@ -2015,13 +2004,15 @@ def RetCC_X86_32 : CallingConv&lt;[
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="assemblyPrinter">Assembly Printer</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 During the code emission stage, the code generator may utilize an LLVM pass to
@@ -2171,12 +2162,12 @@ output.
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="subtargetSupport">Subtarget Support</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Subtarget support is used to inform the code generation process of instruction
@@ -2289,12 +2280,12 @@ XXXSubtarget::XXXSubtarget(const Module &amp;M, const std::string &amp;FS) {
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="jitSupport">JIT Support</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 The implementation of a target machine optionally includes a Just-In-Time (JIT)
@@ -2333,14 +2324,12 @@ Both <tt>XXXJITInfo.cpp</tt> and <tt>XXXCodeEmitter.cpp</tt> must include the
 that write data (in bytes, words, strings, etc.) to the output stream.
 </p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="mce">Machine Code Emitter</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 In <tt>XXXCodeEmitter.cpp</tt>, a target-specific of the <tt>Emitter</tt> class
@@ -2478,11 +2467,11 @@ enum RelocationType {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="targetJITInfo">Target JIT Info</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>
 <tt>XXXJITInfo.cpp</tt> implements the JIT interfaces for target-specific
@@ -2537,6 +2526,8 @@ with assembler.
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 
 <hr>
@@ -2547,9 +2538,9 @@ with assembler.
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://www.woo.com">Mason Woo</a> and <a href="http://misha.brukman.net">Misha Brukman</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2010-11-23 04:31:01 +0100 (Tue, 23 Nov 2010) $
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/WritingAnLLVMPass.html b/docs/WritingAnLLVMPass.html
index 80258e4..75426e0 100644
--- a/docs/WritingAnLLVMPass.html
+++ b/docs/WritingAnLLVMPass.html
@@ -8,9 +8,9 @@
 </head>
 <body>
 
-<div class="doc_title">
+<h1>
   Writing an LLVM Pass
-</div>
+</h1>
 
 <ol>
   <li><a href="#introduction">Introduction - What is a pass?</a></li>
@@ -121,12 +121,12 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="introduction">Introduction - What is a pass?</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The LLVM Pass Framework is an important part of the LLVM system, because LLVM
 passes are where most of the interesting parts of the compiler exist.  Passes
@@ -156,12 +156,12 @@ more advanced features are discussed.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="quickstart">Quick Start - Writing hello world</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Here we describe how to write the "hello world" of passes.  The "Hello" pass
 is designed to simply print out the name of non-external functions that exist in
@@ -169,14 +169,12 @@ the program being compiled.  It does not modify the program at all, it just
 inspects it.  The source code and files for this pass are available in the LLVM
 source tree in the <tt>lib/Transforms/Hello</tt> directory.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="makefile">Setting up the build environment</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
   <p>First, configure and build LLVM.  This needs to be done directly inside the
   LLVM source tree rather than in a separate objects directory.
@@ -185,7 +183,7 @@ source tree in the <tt>lib/Transforms/Hello</tt> directory.</p>
   <tt>lib/Transforms/Hello</tt>.  Finally, you must set up a build script 
   (Makefile) that will compile the source code for the new pass.  To do this, 
   copy the following into <tt>Makefile</tt>:</p>
-  <hr/>
+  <hr>
 
 <div class="doc_code"><pre>
 # Makefile for hello pass
@@ -211,17 +209,20 @@ the <tt>opt</tt> or <tt>bugpoint</tt> tools via their <tt>-load</tt> options.
 If your operating system uses a suffix other than .so (such as windows or 
 Mac OS/X), the appropriate extension will be used.</p>
 
+<p>If you are used CMake to build LLVM, see
+<a href="CMake.html#passdev">Developing an LLVM pass with CMake</a>.</p>
+
 <p>Now that we have the build scripts set up, we just need to write the code for
 the pass itself.</p>
 
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="basiccode">Basic code required</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Now that we have a way to compile our new pass, we just have to write it.
 Start out with:</p>
@@ -301,7 +302,7 @@ function.</p>
 initialization value is not important.</p>
 
 <div class="doc_code"><pre>
-  static RegisterPass<Hello> X("<i>hello</i>", "<i>Hello World Pass</i>",
+  static RegisterPass&lt;Hello&gt; X("<i>hello</i>", "<i>Hello World Pass</i>",
                         false /* Only looks at CFG */,
                         false /* Analysis Pass */);
 }  <i>// end of anonymous namespace</i>
@@ -337,7 +338,7 @@ is supplied as fourth argument. </p>
   };
   
   char Hello::ID = 0;
-  static RegisterPass<Hello> X("hello", "Hello World Pass", false, false);
+  static RegisterPass&lt;Hello&gt; X("hello", "Hello World Pass", false, false);
 }
 
 </pre></div>
@@ -353,11 +354,11 @@ them) to be useful.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="running">Running a pass with <tt>opt</tt></a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>Now that you have a brand new shiny shared object file, we can use the
 <tt>opt</tt> command to run an LLVM program through your pass.  Because you
@@ -443,13 +444,15 @@ about some more details of how they work and how to use them.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="passtype">Pass classes and requirements</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>One of the first things that you should do when designing a new pass is to
 decide what class you should subclass for your pass.  The <a
@@ -464,14 +467,12 @@ listed.  This gives the LLVM Pass Infrastructure information necessary to
 optimize how passes are run, so that the resultant compiler isn't unnecessarily
 slow.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ImmutablePass">The <tt>ImmutablePass</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The most plain and boring type of pass is the "<tt><a
 href="http://llvm.org/doxygen/classllvm_1_1ImmutablePass.html">ImmutablePass</a></tt>"
@@ -490,11 +491,11 @@ invalidated, and are never "run".</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="ModulePass">The <tt>ModulePass</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The "<tt><a
 href="http://llvm.org/doxygen/classllvm_1_1ModulePass.html">ModulePass</a></tt>"
@@ -516,14 +517,12 @@ DominatorTree for function definitions, not declarations.</p>
 <tt>ModulePass</tt> and overload the <tt>runOnModule</tt> method with the
 following signature:</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="runOnModule">The <tt>runOnModule</tt> method</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> runOnModule(Module &amp;M) = 0;
@@ -535,12 +534,14 @@ false otherwise.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="CallGraphSCCPass">The <tt>CallGraphSCCPass</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The "<tt><a
 href="http://llvm.org/doxygen/classllvm_1_1CallGraphSCCPass.html">CallGraphSCCPass</a></tt>"
@@ -581,15 +582,14 @@ because it has to handle SCCs with more than one node in it.  All of the virtual
 methods described below should return true if they modified the program, or
 false if they didn't.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="doInitialization_scc">The <tt>doInitialization(CallGraph &amp;)</tt>
-  method</a>
-</div>
+<h4>
+  <a name="doInitialization_scc">
+    The <tt>doInitialization(CallGraph &amp;)</tt> method
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> doInitialization(CallGraph &amp;CG);
@@ -606,11 +606,11 @@ fast).</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="runOnSCC">The <tt>runOnSCC</tt> method</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> runOnSCC(CallGraphSCC &amp;SCC) = 0;
@@ -623,12 +623,13 @@ otherwise.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="doFinalization_scc">The <tt>doFinalization(CallGraph
-   &amp;)</tt> method</a>
-</div>
+<h4>
+  <a name="doFinalization_scc">
+    The <tt>doFinalization(CallGraph &amp;)</tt> method
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> doFinalization(CallGraph &amp;CG);
@@ -641,12 +642,14 @@ program being compiled.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="FunctionPass">The <tt>FunctionPass</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>In contrast to <tt>ModulePass</tt> subclasses, <tt><a
 href="http://llvm.org/doxygen/classllvm_1_1Pass.html">FunctionPass</a></tt>
@@ -671,15 +674,14 @@ href="#basiccode">Hello World</a> pass for example).  <tt>FunctionPass</tt>'s
 may overload three virtual methods to do their work.  All of these methods
 should return true if they modified the program, or false if they didn't.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="doInitialization_mod">The <tt>doInitialization(Module &amp;)</tt>
-  method</a>
-</div>
+<h4>
+  <a name="doInitialization_mod">
+    The <tt>doInitialization(Module &amp;)</tt> method
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> doInitialization(Module &amp;M);
@@ -703,11 +705,11 @@ free functions that it needs, adding prototypes to the module if necessary.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="runOnFunction">The <tt>runOnFunction</tt> method</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> runOnFunction(Function &amp;F) = 0;
@@ -720,12 +722,13 @@ be returned if the function is modified.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="doFinalization_mod">The <tt>doFinalization(Module
-  &amp;)</tt> method</a>
-</div>
+<h4>
+  <a name="doFinalization_mod">
+    The <tt>doFinalization(Module &amp;)</tt> method
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> doFinalization(Module &amp;M);
@@ -738,12 +741,14 @@ program being compiled.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="LoopPass">The <tt>LoopPass</tt> class </a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p> All <tt>LoopPass</tt> execute on each loop in the function independent of
 all of the other loops in the function. <tt>LoopPass</tt> processes loops in
@@ -751,19 +756,18 @@ loop nest order such that outer most loop is processed last. </p>
 
 <p> <tt>LoopPass</tt> subclasses are allowed to update loop nest using
 <tt>LPPassManager</tt> interface. Implementing a loop pass is usually
-straightforward. <tt>Looppass</tt>'s may overload three virtual methods to
+straightforward. <tt>LoopPass</tt>'s may overload three virtual methods to
 do their work. All these methods should return true if they modified the 
 program, or false if they didn't. </p>
-</div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="doInitialization_loop">The <tt>doInitialization(Loop *,
-                                                 LPPassManager &amp;)</tt>
-  method</a>
-</div>
+<h4>
+  <a name="doInitialization_loop">
+    The <tt>doInitialization(Loop *,LPPassManager &amp;)</tt> method
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> doInitialization(Loop *, LPPassManager &amp;LPM);
@@ -780,11 +784,11 @@ information.</p>
 
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="runOnLoop">The <tt>runOnLoop</tt> method</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> runOnLoop(Loop *, LPPassManager &amp;LPM) = 0;
@@ -798,11 +802,11 @@ should be used to update loop nest.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="doFinalization_loop">The <tt>doFinalization()</tt> method</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> doFinalization();
@@ -815,12 +819,14 @@ program being compiled. </p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="RegionPass">The <tt>RegionPass</tt> class </a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p> <tt>RegionPass</tt> is similar to <a href="#LoopPass"><tt>LoopPass</tt></a>,
 but executes on each single entry single exit region in the function.
@@ -829,19 +835,18 @@ region is processed last.  </p>
 
 <p> <tt>RegionPass</tt> subclasses are allowed to update the region tree by using
 the <tt>RGPassManager</tt> interface. You may overload three virtual methods of
-<tt>RegionPass</tt> to implementing your own region pass is usually. All these
+<tt>RegionPass</tt> to implement your own region pass. All these
 methods should return true if they modified the program, or false if they didn not.
 </p>
-</div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="doInitialization_region">The <tt>doInitialization(Region *,
-                                                 RGPassManager &amp;)</tt>
-  method</a>
-</div>
+<h4>
+  <a name="doInitialization_region">
+    The <tt>doInitialization(Region *, RGPassManager &amp;)</tt> method
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> doInitialization(Region *, RGPassManager &amp;RGM);
@@ -858,11 +863,11 @@ information.</p>
 
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="runOnRegion">The <tt>runOnRegion</tt> method</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> runOnRegion(Region *, RGPassManager &amp;RGM) = 0;
@@ -876,11 +881,11 @@ should be used to update region tree.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="doFinalization_region">The <tt>doFinalization()</tt> method</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> doFinalization();
@@ -893,14 +898,14 @@ program being compiled. </p>
 
 </div>
 
-
+</div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="BasicBlockPass">The <tt>BasicBlockPass</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p><tt>BasicBlockPass</tt>'s are just like <a
 href="#FunctionPass"><tt>FunctionPass</tt></a>'s, except that they must limit
@@ -922,15 +927,14 @@ href="#doInitialization_mod"><tt>doInitialization(Module &amp;)</tt></a> and <a
 href="#doFinalization_mod"><tt>doFinalization(Module &amp;)</tt></a> methods that <a
 href="#FunctionPass"><tt>FunctionPass</tt></a>'s have, but also have the following virtual methods that may also be implemented:</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="doInitialization_fn">The <tt>doInitialization(Function
-  &amp;)</tt> method</a>
-</div>
+<h4>
+  <a name="doInitialization_fn">
+    The <tt>doInitialization(Function &amp;)</tt> method
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> doInitialization(Function &amp;F);
@@ -947,11 +951,11 @@ fast).</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="runOnBasicBlock">The <tt>runOnBasicBlock</tt> method</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> runOnBasicBlock(BasicBlock &amp;BB) = 0;
@@ -965,12 +969,13 @@ if the basic block is modified.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="doFinalization_fn">The <tt>doFinalization(Function &amp;)</tt> 
-  method</a>
-</div>
+<h4>
+  <a name="doFinalization_fn">
+    The <tt>doFinalization(Function &amp;)</tt> method
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> doFinalization(Function &amp;F);
@@ -984,12 +989,14 @@ finalization.</p>
 
 </div>
 
+</div>
+
 <!-- ======================================================================= -->
-<div class="doc_subsection">
+<h3>
   <a name="MachineFunctionPass">The <tt>MachineFunctionPass</tt> class</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>A <tt>MachineFunctionPass</tt> is a part of the LLVM code generator that
 executes on the machine-dependent representation of each LLVM function in the
@@ -1014,15 +1021,14 @@ href="#runOnMachineFunction"><tt>runOnMachineFunction</tt></a> (including global
 data)</li>
 </ol>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="runOnMachineFunction">The <tt>runOnMachineFunction(MachineFunction
-  &amp;MF)</tt> method</a>
-</div>
+<h4>
+  <a name="runOnMachineFunction">
+    The <tt>runOnMachineFunction(MachineFunction &amp;MF)</tt> method
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual bool</b> runOnMachineFunction(MachineFunction &amp;MF) = 0;
@@ -1043,13 +1049,17 @@ remember, you may not modify the LLVM <tt>Function</tt> or its contents from a
 
 </div>
 
+</div>
+
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="registration">Pass registration</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>In the <a href="#basiccode">Hello World</a> example pass we illustrated how
 pass registration works, and discussed some of the reasons that it is used and
@@ -1066,14 +1076,12 @@ well as for debug output generated by the <tt>--debug-pass</tt> option.</p>
 <p>If you want your pass to be easily dumpable, you should 
 implement the virtual <tt>print</tt> method:</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="print">The <tt>print</tt> method</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual void</b> print(std::ostream &amp;O, <b>const</b> Module *M) <b>const</b>;
@@ -1093,13 +1101,15 @@ depended on.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="interaction">Specifying interactions between passes</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>One of the main responsibilities of the <tt>PassManager</tt> is to make sure
 that passes interact with each other correctly.  Because <tt>PassManager</tt>
@@ -1116,14 +1126,12 @@ specifies.  If a pass does not implement the <tt><a
 href="#getAnalysisUsage">getAnalysisUsage</a></tt> method, it defaults to not
 having any prerequisite passes, and invalidating <b>all</b> other passes.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="getAnalysisUsage">The <tt>getAnalysisUsage</tt> method</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual void</b> getAnalysisUsage(AnalysisUsage &amp;Info) <b>const</b>;
@@ -1139,11 +1147,14 @@ object:</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="AU::addRequired">The <tt>AnalysisUsage::addRequired&lt;&gt;</tt> and <tt>AnalysisUsage::addRequiredTransitive&lt;&gt;</tt> methods</a>
-</div>
-
-<div class="doc_text">
+<h4>
+  <a name="AU::addRequired">
+    The <tt>AnalysisUsage::addRequired&lt;&gt;</tt>
+    and <tt>AnalysisUsage::addRequiredTransitive&lt;&gt;</tt> methods
+  </a>
+</h4>
+
+<div>
 <p>
 If your pass requires a previous pass to be executed (an analysis for example),
 it can use one of these methods to arrange for it to be run before your pass.
@@ -1165,11 +1176,13 @@ pass is.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="AU::addPreserved">The <tt>AnalysisUsage::addPreserved&lt;&gt;</tt> method</a>
-</div>
+<h4>
+  <a name="AU::addPreserved">
+    The <tt>AnalysisUsage::addPreserved&lt;&gt;</tt> method
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 <p>
 One of the jobs of the PassManager is to optimize how and when analyses are run.
 In particular, it attempts to avoid recomputing data unless it needs to.  For
@@ -1200,22 +1213,13 @@ the fact that it hacks on the CFG.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="AU::examples">Example implementations of <tt>getAnalysisUsage</tt></a>
-</div>
-
-<div class="doc_text">
-
-<div class="doc_code"><pre>
-  <i>// This is an example implementation from an analysis, which does not modify
-  // the program at all, yet has a prerequisite.</i>
-  <b>void</b> <a href="http://llvm.org/doxygen/classllvm_1_1PostDominanceFrontier.html">PostDominanceFrontier</a>::getAnalysisUsage(AnalysisUsage &amp;AU) <b>const</b> {
-    AU.setPreservesAll();
-    AU.addRequired&lt;<a href="http://llvm.org/doxygen/classllvm_1_1PostDominatorTree.html">PostDominatorTree</a>&gt;();
-  }
-</pre></div>
+<h4>
+  <a name="AU::examples">
+    Example implementations of <tt>getAnalysisUsage</tt>
+  </a>
+</h4>
 
-<p>and:</p>
+<div>
 
 <div class="doc_code"><pre>
   <i>// This example modifies the program, but does not modify the CFG</i>
@@ -1228,12 +1232,14 @@ the fact that it hacks on the CFG.
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="getAnalysis">The <tt>getAnalysis&lt;&gt;</tt> and
-<tt>getAnalysisIfAvailable&lt;&gt;</tt> methods</a>
-</div>
+<h4>
+  <a name="getAnalysis">
+    The <tt>getAnalysis&lt;&gt;</tt> and
+    <tt>getAnalysisIfAvailable&lt;&gt;</tt> methods
+  </a>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>Pass::getAnalysis&lt;&gt;</tt> method is automatically inherited by
 your class, providing you with access to the passes that you declared that you
@@ -1285,13 +1291,15 @@ if it is active.  For example:</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="analysisgroup">Implementing Analysis Groups</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Now that we understand the basics of how passes are defined, how they are
 used, and how they are required from other passes, it's time to get a little bit
@@ -1310,14 +1318,12 @@ between these two extremes for other implementations).  To cleanly support
 situations like this, the LLVM Pass Infrastructure supports the notion of
 Analysis Groups.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="agconcepts">Analysis Group Concepts</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>An Analysis Group is a single simple interface that may be implemented by
 multiple different passes.  Analysis Groups can be given human readable names
@@ -1364,11 +1370,11 @@ hypothetical example) instead.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="registerag">Using <tt>RegisterAnalysisGroup</tt></a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>The <tt>RegisterAnalysisGroup</tt> template is used to register the analysis
 group itself, while the <tt>INITIALIZE_AG_PASS</tt> is used to add pass
@@ -1425,13 +1431,15 @@ pass is the default implementation for the interface.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="passStatistics">Pass Statistics</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 <p>The <a
 href="http://llvm.org/doxygen/Statistic_8h-source.html"><tt>Statistic</tt></a>
 class is designed to be an easy way to expose various success
@@ -1443,12 +1451,12 @@ line. See the <a href="http://llvm.org/docs/ProgrammersManual.html#Statistic">St
 
 
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="passmanager">What PassManager does</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The <a
 href="http://llvm.org/doxygen/PassManager_8h-source.html"><tt>PassManager</tt></a>
@@ -1615,14 +1623,12 @@ Hello: main
 <p>Which shows that we don't accidentally invalidate dominator information
 anymore, and therefore do not have to compute it twice.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="releaseMemory">The <tt>releaseMemory</tt> method</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <div class="doc_code"><pre>
   <b>virtual void</b> releaseMemory();
@@ -1643,13 +1649,15 @@ class, before the next call of <tt>run*</tt> in your pass.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="registering">Registering dynamically loaded passes</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p><i>Size matters</i> when constructing production quality tools using llvm, 
 both for the purposes of distribution, and for regulating the resident code size
@@ -1676,14 +1684,12 @@ the static destructor <i>unregisters</i>. Thus a pass that is statically linked
 in the tool will be registered at start up. A dynamically loaded pass will
 register on load and unregister at unload.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection">
+<h3>
   <a name="registering_existing">Using existing registries</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>There are predefined registries to track instruction scheduling
 (<tt>RegisterScheduler</tt>) and register allocation (<tt>RegisterRegAlloc</tt>)
@@ -1744,11 +1750,11 @@ call line to <tt>llvm/Codegen/LinkAllCodegenComponents.h</tt>.</p>
 
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsection">
+<h3>
   <a name="registering_new">Creating new registries</a>
-</div>
+</h3>
 
-<div class="doc_text">
+<div>
 
 <p>The easiest way to get started is to clone one of the existing registries; we
 recommend <tt>llvm/CodeGen/RegAllocRegistry.h</tt>.  The key things to modify
@@ -1776,13 +1782,15 @@ creator.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="debughints">Using GDB with dynamically loaded passes</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Unfortunately, using GDB with dynamically loaded passes is not as easy as it
 should be.  First of all, you can't set a breakpoint in a shared object that has
@@ -1794,14 +1802,12 @@ GDB.</p>
 transformation invoked by <tt>opt</tt>, although nothing described here depends
 on that.</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="breakpoint">Setting a breakpoint in your pass</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>First thing you do is start <tt>gdb</tt> on the <tt>opt</tt> process:</p>
 
@@ -1842,11 +1848,11 @@ or do other standard debugging stuff.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="debugmisc">Miscellaneous Problems</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Once you have the basics down, there are a couple of problems that GDB has,
 some with solutions, some without.</p>
@@ -1874,26 +1880,26 @@ href="mailto:sabre@nondot.org">Chris</a>.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section">
+<h2>
   <a name="future">Future extensions planned</a>
-</div>
+</h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Although the LLVM Pass Infrastructure is very capable as it stands, and does
 some nifty stuff, there are things we'd like to add in the future.  Here is
 where we are going:</p>
 
-</div>
-
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
+<h4>
   <a name="SMP">Multithreaded LLVM</a>
-</div>
+</h4>
 
-<div class="doc_text">
+<div>
 
 <p>Multiple CPU machines are becoming more common and compilation can never be
 fast enough: obviously we should allow for a multithreaded compiler.  Because of
@@ -1911,6 +1917,8 @@ Despite that, we have kept the LLVM passes SMP ready, and you should too.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 <hr>
 <address>
@@ -1920,8 +1928,8 @@ Despite that, we have kept the LLVM passes SMP ready, and you should too.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-02-15 10:23:02 +0100 (Tue, 15 Feb 2011) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 
 </body>
diff --git a/docs/doxygen.css b/docs/doxygen.css
index 83b049b..80c6cad 100644
--- a/docs/doxygen.css
+++ b/docs/doxygen.css
@@ -370,9 +370,39 @@ H2 {
 H3 {
  font-size: 100%;
 }
+
+H2, H3 {
+  border-bottom: 2px solid;
+  margin-top: 2em;
+}
+
 A.qindex {}
 A.qindexRef {}
 A.el { text-decoration: none; font-weight: bold }
 A.elRef { font-weight: bold }
 A.code { text-decoration: none; font-weight: normal; color: #4444ee }
 A.codeRef { font-weight: normal; color: #4444ee }
+
+div.memitem {
+  border: 1px solid #999999;
+  margin-top: 1.0em;
+  margin-bottom: 1.0em;
+  -webkit-border-radius: 0.5em;
+  -webkit-box-shadow: 3px 3px 6px #777777;
+  -moz-border-radius: 0.5em;
+  -moz-box-shadow: black 3px 3px 3px;
+}
+
+div.memproto {
+  background-color: #E3E4E5;
+  padding: 0.25em 0.5em;
+  -webkit-border-top-left-radius: 0.5em;
+  -webkit-border-top-right-radius: 0.5em;
+  -moz-border-radius-topleft: 0.5em;
+  -moz-border-radius-topright: 0.5em;
+}
+
+div.memdoc {
+  padding-left: 1em;
+  padding-right: 1em;
+}
diff --git a/docs/doxygen.footer b/docs/doxygen.footer
index d75fff5..15585b8 100644
--- a/docs/doxygen.footer
+++ b/docs/doxygen.footer
@@ -1,6 +1,6 @@
 <hr>
 <p class="footer">
-Generated on $datetime for <a href="http://llvm.org">$projectname</a> by
+Generated on $datetime for <a href="http://llvm.org/">$projectname</a> by
 <a href="http://www.doxygen.org"><img src="doxygen.png" alt="Doxygen"
 align="middle" border="0"/>$doxygenversion</a><br>
 Copyright &copy; 2003-2009 University of Illinois at Urbana-Champaign.
diff --git a/docs/index.html b/docs/index.html
index 74c39f6..a25148a 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -7,13 +7,12 @@
 </head>
 <body>
 
-<div class="doc_title">Documentation for the LLVM System at SVN head</div>
+<h1>Documentation for the LLVM System at SVN head</h1>
 
 <p class="doc_warning">If you are using a released version of LLVM,
 see <a href="http://llvm.org/releases/">the download page</a> to find
 your documentation.</p>
 
-<div class="doc_text">
 <table class="layout" width="95%"><tr class="layout"><td class="left">
 <ul>
   <li><a href="#llvmdesign">LLVM Design</a></li>
@@ -32,14 +31,13 @@ your documentation.</p>
     </p>
   </form>
 </td></tr></table>
-</div>
 
 <div class="doc_author">
-  <p>Written by <a href="http://llvm.org">The LLVM Team</a></p>
+  <p>Written by <a href="http://llvm.org/">The LLVM Team</a></p>
 </div>
 
 <!--=======================================================================-->
-<div class="doc_section"><a name="llvmdesign">LLVM Design &amp; Overview</a></div>
+<h2><a name="llvmdesign">LLVM Design &amp; Overview</a></h2>
 <!--=======================================================================-->
 
 <ul>
@@ -57,7 +55,7 @@ frequent questions about LLVM's most frequently misunderstood instruction.</li>
 </ul>
 
 <!--=======================================================================-->
-<div class="doc_section"><a name="userguide">LLVM User Guides</a></div>
+<h2><a name="userguide">LLVM User Guides</a></h2>
 <!--=======================================================================-->
 
 <ul>
@@ -75,7 +73,7 @@ LLVM for a custom language, and the facilities LLVM offers in tutorial form.</li
 <li><a href="DeveloperPolicy.html">Developer Policy</a> - The LLVM project's
 policy towards developers and their contributions.</li>
 
-<li><a href="/docs/CommandGuide/index.html">LLVM Command Guide</a> - A reference
+<li><a href="CommandGuide/index.html">LLVM Command Guide</a> - A reference
 manual for the LLVM command line utilities ("man" pages for LLVM tools).<br>
 Current tools:
  <a href="/cmds/llvm-ar.html">llvm-ar</a>,
@@ -131,7 +129,7 @@ href="irc://irc.oftc.net/llvm">join #llvm on irc.oftc.net</a> directly.</li>
 
 
 <!--=======================================================================-->
-<div class="doc_section"><a name="llvmprog">General LLVM Programming Documentation</a></div>
+<h2><a name="llvmprog">General LLVM Programming Documentation</a></h2>
 <!--=======================================================================-->
 
 <ul>
@@ -179,7 +177,7 @@ href="http://llvm.org/doxygen/inherits.html">classes</a>)
 </ul>
 
 <!--=======================================================================-->
-<div class="doc_section"><a name="subsystems">LLVM Subsystem Documentation</a></div>
+<h2><a name="subsystems">LLVM Subsystem Documentation</a></h2>
 <!--=======================================================================-->
 
 <ul>
@@ -246,7 +244,7 @@ JITed code with GDB.</li>
 
 
 <!--=======================================================================-->
-<div class="doc_section"><a name="maillist">LLVM Mailing Lists</a></div>
+<h2><a name="maillist">LLVM Mailing Lists</a></h2>
 <!--=======================================================================-->
 
 <ul>
@@ -286,8 +284,8 @@ times each day, making it a high volume list.</li>
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
-  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body></html>
 
diff --git a/docs/llvm.css b/docs/llvm.css
index f572b5e..1222cf1 100644
--- a/docs/llvm.css
+++ b/docs/llvm.css
@@ -23,7 +23,7 @@ th          { border: 2px solid gray; font-weight: bold; font-size: 105%;
  * Documentation
  */
 /* Common for title and header */
-.doc_title, .doc_section, .doc_subsection, h1, h2 {
+.doc_title, .doc_section, .doc_subsection, h1, h2, h3 {
   color: black; background: url("img/lines.gif");
   font-family: "Georgia,Palatino,Times,Roman,SanSerif"; font-weight: bold;
   border-width: 1px;
@@ -35,17 +35,17 @@ th          { border: 2px solid gray; font-weight: bold; font-size: 105%;
   padding-bottom: 2px
 }
 
-h1, .doc_section   { text-align: center; font-size: 22pt;
-                     margin: 20pt 0pt 5pt 0pt; }
+h1, .doc_title, .title { text-align: left;   font-size: 25pt }
 
-.doc_title, .title { text-align: left;   font-size: 25pt }
+h2, .doc_section   { text-align: center; font-size: 22pt;
+                     margin: 20pt 0pt 5pt 0pt; }
 
-h2, .doc_subsection { width: 75%;
+h3, .doc_subsection { width: 75%;
                       text-align: left;  font-size: 12pt;
                       padding: 4pt 4pt 4pt 4pt;
                       margin: 1.5em 0.5em 0.5em 0.5em }
 
-h3, .doc_subsubsection { margin: 2.0em 0.5em 0.5em 0.5em;
+h4, .doc_subsubsection { margin: 2.0em 0.5em 0.5em 0.5em;
                          font-weight: bold; font-style: oblique;
                          border-bottom: 1px solid #999999; font-size: 12pt;
                          width: 75%; }
@@ -70,6 +70,10 @@ h3, .doc_subsubsection { margin: 2.0em 0.5em 0.5em 0.5em;
                   display: table;
                 }
 
+h2+div, h2+p {text-align: left; padding-left: 20pt; padding-right: 10pt;}
+h3+div, h3+p {text-align: left; padding-left: 20pt; padding-right: 10pt;}
+h4+div, h4+p {text-align: left; padding-left: 20pt; padding-right: 10pt;}
+
 /* It is preferrable to use <pre class="doc_code"> everywhere instead of the
  * <div class="doc_code"><pre>...</ptr></div> construct.
  *
diff --git a/docs/tutorial/LangImpl1.html b/docs/tutorial/LangImpl1.html
index c256af4..22a2b12 100644
--- a/docs/tutorial/LangImpl1.html
+++ b/docs/tutorial/LangImpl1.html
@@ -11,7 +11,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Tutorial Introduction and the Lexer</div>
+<h1>Kaleidoscope: Tutorial Introduction and the Lexer</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -30,10 +30,10 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="intro">Tutorial Introduction</a></div>
+<h2><a name="intro">Tutorial Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to the "Implementing a language with LLVM" tutorial.  This tutorial
 runs through the implementation of a simple language, showing how fun and
@@ -123,10 +123,10 @@ languages!</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="language">The Basic Language</a></div>
+<h2><a name="language">The Basic Language</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This tutorial will be illustrated with a toy language that we'll call
 "<a href="http://en.wikipedia.org/wiki/Kaleidoscope">Kaleidoscope</a>" (derived 
@@ -181,10 +181,10 @@ a Mandelbrot Set</a> at various levels of magnification.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="lexer">The Lexer</a></div>
+<h2><a name="lexer">The Lexer</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>When it comes to implementing a language, the first thing needed is
 the ability to process a text file and recognize what it says.  The traditional
@@ -341,8 +341,8 @@ so that you can use the lexer and parser together.
   src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl2.html b/docs/tutorial/LangImpl2.html
index f39ed6c..c6a9bb1 100644
--- a/docs/tutorial/LangImpl2.html
+++ b/docs/tutorial/LangImpl2.html
@@ -11,7 +11,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Implementing a Parser and AST</div>
+<h1>Kaleidoscope: Implementing a Parser and AST</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -36,10 +36,10 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="intro">Chapter 2 Introduction</a></div>
+<h2><a name="intro">Chapter 2 Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to Chapter 2 of the "<a href="index.html">Implementing a language
 with LLVM</a>" tutorial.  This chapter shows you how to use the lexer, built in 
@@ -61,10 +61,10 @@ Tree.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="ast">The Abstract Syntax Tree (AST)</a></div>
+<h2><a name="ast">The Abstract Syntax Tree (AST)</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The AST for a program captures its behavior in such a way that it is easy for
 later stages of the compiler (e.g. code generation) to interpret.  We basically
@@ -178,10 +178,10 @@ bodies in Kaleidoscope.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="parserbasics">Parser Basics</a></div>
+<h2><a name="parserbasics">Parser Basics</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Now that we have an AST to build, we need to define the parser code to build
 it.  The idea here is that we want to parse something like "x+y" (which is
@@ -239,11 +239,10 @@ piece of our grammar: numeric literals.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="parserprimexprs">Basic Expression
- Parsing</a></div>
+<h2><a name="parserprimexprs">Basic Expression Parsing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>We start with numeric literals, because they are the simplest to process.
 For each production in our grammar, we'll define a function which parses that
@@ -394,11 +393,10 @@ They are a bit more complex.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="parserbinops">Binary Expression
- Parsing</a></div>
+<h2><a name="parserbinops">Binary Expression Parsing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Binary expressions are significantly harder to parse because they are often
 ambiguous.  For example, when given the string "x+y*z", the parser can choose
@@ -617,10 +615,10 @@ handle function definitions, etc.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="parsertop">Parsing the Rest</a></div>
+<h2><a name="parsertop">Parsing the Rest</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 The next thing missing is handling of function prototypes.  In Kaleidoscope,
@@ -714,10 +712,10 @@ actually <em>execute</em> this code we've built!</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="driver">The Driver</a></div>
+<h2><a name="driver">The Driver</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The driver for this simply invokes all of the parsing pieces with a top-level
 dispatch loop.  There isn't much interesting here, so I'll just include the
@@ -753,10 +751,10 @@ type "4+5;", and the parser will know you are done.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="conclusions">Conclusions</a></div>
+<h2><a name="conclusions">Conclusions</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>With just under 400 lines of commented code (240 lines of non-comment, 
 non-blank code), we fully defined our minimal language, including a lexer,
@@ -790,10 +788,10 @@ Representation (IR) from the AST.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<h2><a name="code">Full Code Listing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Here is the complete code listing for this and the previous chapter.  
@@ -1226,8 +1224,8 @@ int main() {
   src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl3.html b/docs/tutorial/LangImpl3.html
index a320ff7..47406ca 100644
--- a/docs/tutorial/LangImpl3.html
+++ b/docs/tutorial/LangImpl3.html
@@ -11,7 +11,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Code generation to LLVM IR</div>
+<h1>Kaleidoscope: Code generation to LLVM IR</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -34,10 +34,10 @@ Support</li>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="intro">Chapter 3 Introduction</a></div>
+<h2><a name="intro">Chapter 3 Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to Chapter 3 of the "<a href="index.html">Implementing a language
 with LLVM</a>" tutorial.  This chapter shows you how to transform the <a 
@@ -57,10 +57,10 @@ releases page</a>.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="basics">Code Generation Setup</a></div>
+<h2><a name="basics">Code Generation Setup</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 In order to generate LLVM IR, we want some simple setup to get started.  First
@@ -147,10 +147,10 @@ has already been done, and we'll just use it to emit code.
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="exprs">Expression Code Generation</a></div>
+<h2><a name="exprs">Expression Code Generation</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Generating LLVM code for expression nodes is very straightforward: less
 than 45 lines of commented code for all four of our expression nodes.  First
@@ -293,10 +293,10 @@ basic framework.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="funcs">Function Code Generation</a></div>
+<h2><a name="funcs">Function Code Generation</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Code generation for prototypes and functions must handle a number of
 details, which make their code less beautiful than expression code
@@ -515,11 +515,10 @@ def bar() foo(1, 2); # error, unknown function "foo"
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="driver">Driver Changes and 
-Closing Thoughts</a></div>
+<h2><a name="driver">Driver Changes and Closing Thoughts</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 For now, code generation to LLVM doesn't really get us much, except that we can
@@ -657,10 +656,10 @@ support</a> to this so we can actually start running code!</p>
 
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<h2><a name="code">Full Code Listing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Here is the complete code listing for our running example, enhanced with the
@@ -1262,8 +1261,8 @@ int main() {
   src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-02-15 01:24:32 +0100 (Tue, 15 Feb 2011) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl4.html b/docs/tutorial/LangImpl4.html
index a2511d9..5b8990e4 100644
--- a/docs/tutorial/LangImpl4.html
+++ b/docs/tutorial/LangImpl4.html
@@ -11,7 +11,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Adding JIT and Optimizer Support</div>
+<h1>Kaleidoscope: Adding JIT and Optimizer Support</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -33,10 +33,10 @@ Flow</li>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="intro">Chapter 4 Introduction</a></div>
+<h2><a name="intro">Chapter 4 Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to Chapter 4 of the "<a href="index.html">Implementing a language
 with LLVM</a>" tutorial.  Chapters 1-3 described the implementation of a simple
@@ -48,11 +48,10 @@ for the Kaleidoscope language.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="trivialconstfold">Trivial Constant
-Folding</a></div>
+<h2><a name="trivialconstfold">Trivial Constant Folding</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Our demonstration for Chapter 3 is elegant and easy to extend.  Unfortunately,
@@ -134,11 +133,10 @@ range of optimizations that you can use, in the form of "passes".</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="optimizerpasses">LLVM Optimization
- Passes</a></div>
+<h2><a name="optimizerpasses">LLVM Optimization Passes</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>LLVM provides many optimization passes, which do many different sorts of
 things and have different tradeoffs.  Unlike other systems, LLVM doesn't hold
@@ -266,10 +264,10 @@ executing it!</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="jit">Adding a JIT Compiler</a></div>
+<h2><a name="jit">Adding a JIT Compiler</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Code that is available in LLVM IR can have a wide variety of tools 
 applied to it.  For example, you can run optimizations on it (as we did above),
@@ -474,10 +472,10 @@ tackling some interesting LLVM IR issues along the way.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<h2><a name="code">Full Code Listing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Here is the complete code listing for our running example, enhanced with the
@@ -1078,7 +1076,7 @@ int main() {
 
   // Create the JIT.  This takes ownership of the module.
   std::string ErrStr;
-  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&amp;ErrStr).create();
   if (!TheExecutionEngine) {
     fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
     exit(1);
@@ -1130,8 +1128,8 @@ int main() {
   src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl5.html b/docs/tutorial/LangImpl5.html
index d2c3bd0..4fc23a1 100644
--- a/docs/tutorial/LangImpl5.html
+++ b/docs/tutorial/LangImpl5.html
@@ -11,7 +11,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Extending the Language: Control Flow</div>
+<h1>Kaleidoscope: Extending the Language: Control Flow</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -48,10 +48,10 @@ User-defined Operators</li>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="intro">Chapter 5 Introduction</a></div>
+<h2><a name="intro">Chapter 5 Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to Chapter 5 of the "<a href="index.html">Implementing a language
 with LLVM</a>" tutorial.  Parts 1-4 described the implementation of the simple
@@ -65,14 +65,14 @@ have an if/then/else expression plus a simple 'for' loop.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="ifthen">If/Then/Else</a></div>
+<h2><a name="ifthen">If/Then/Else</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Extending Kaleidoscope to support if/then/else is quite straightforward.  It
-basically requires adding lexer support for this "new" concept to the lexer,
+basically requires adding support for this "new" concept to the lexer,
 parser, AST, and LLVM code emitter.  This example is nice, because it shows how
 easy it is to "grow" a language over time, incrementally extending it as new
 ideas are discovered.</p>
@@ -108,15 +108,12 @@ Since Kaleidoscope allows side-effects, this behavior is important to nail down.
 <p>Now that we know what we "want", lets break this down into its constituent
 pieces.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="iflexer">Lexer Extensions for
-If/Then/Else</a></div>
+<h4><a name="iflexer">Lexer Extensions for If/Then/Else</a></h4>
 <!-- ======================================================================= -->
 
 
-<div class="doc_text">
+<div>
 
 <p>The lexer extensions are straightforward.  First we add new enum values
 for the relevant tokens:</p>
@@ -146,11 +143,10 @@ stuff:</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="ifast">AST Extensions for
- If/Then/Else</a></div>
+<h4><a name="ifast">AST Extensions for If/Then/Else</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>To represent the new expression we add a new AST node for it:</p>
 
@@ -172,11 +168,10 @@ public:
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="ifparser">Parser Extensions for
-If/Then/Else</a></div>
+<h4><a name="ifparser">Parser Extensions for If/Then/Else</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>Now that we have the relevant tokens coming from the lexer and we have the
 AST node to build, our parsing logic is relatively straightforward.  First we
@@ -231,10 +226,10 @@ static ExprAST *ParsePrimary() {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="ifir">LLVM IR for If/Then/Else</a></div>
+<h4><a name="ifir">LLVM IR for If/Then/Else</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>Now that we have it parsing and building the AST, the final piece is adding
 LLVM code generation support.  This is the most interesting part of the
@@ -347,11 +342,10 @@ directly.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="ifcodegen">Code Generation for 
-If/Then/Else</a></div>
+<h4><a name="ifcodegen">Code Generation for If/Then/Else</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>In order to generate code for this, we implement the <tt>Codegen</tt> method
 for <tt>IfExprAST</tt>:</p>
@@ -472,7 +466,7 @@ are emitted, we can finish up with the merge code:</p>
   // Emit merge block.
   TheFunction->getBasicBlockList().push_back(MergeBB);
   Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
                                   "iftmp");
   
   PN->addIncoming(ThenV, ThenBB);
@@ -500,11 +494,13 @@ another useful expression that is familiar from non-functional languages...</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="for">'for' Loop Expression</a></div>
+<h2><a name="for">'for' Loop Expression</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Now that we know how to add basic control flow constructs to the language,
 we have the tools to add more powerful things.  Lets add something more
@@ -533,14 +529,11 @@ variables, it will get more useful.</p>
 <p>As before, lets talk about the changes that we need to Kaleidoscope to
 support this.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="forlexer">Lexer Extensions for
-the 'for' Loop</a></div>
+<h4><a name="forlexer">Lexer Extensions for the 'for' Loop</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>The lexer extensions are the same sort of thing as for if/then/else:</p>
 
@@ -566,11 +559,10 @@ the 'for' Loop</a></div>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="forast">AST Extensions for
-the 'for' Loop</a></div>
+<h4><a name="forast">AST Extensions for the 'for' Loop</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>The AST node is just as simple.  It basically boils down to capturing
 the variable name and the constituent expressions in the node.</p>
@@ -593,11 +585,10 @@ public:
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="forparser">Parser Extensions for
-the 'for' Loop</a></div>
+<h4><a name="forparser">Parser Extensions for the 'for' Loop</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>The parser code is also fairly standard.  The only interesting thing here is
 handling of the optional step value.  The parser code handles it by checking to
@@ -653,11 +644,10 @@ static ExprAST *ParseForExpr() {
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="forir">LLVM IR for 
-the 'for' Loop</a></div>
+<h4><a name="forir">LLVM IR for the 'for' Loop</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>Now we get to the good part: the LLVM IR we want to generate for this thing.
 With the simple example above, we get this LLVM IR (note that this dump is
@@ -699,11 +689,10 @@ expressions, and some basic blocks.  Lets see how this fits together.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="forcodegen">Code Generation for 
-the 'for' Loop</a></div>
+<h4><a name="forcodegen">Code Generation for the 'for' Loop</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>The first part of Codegen is very simple: we just output the start expression
 for the loop value:</p>
@@ -746,7 +735,7 @@ create an unconditional branch for the fall-through between the two blocks.</p>
   Builder.SetInsertPoint(LoopBB);
   
   // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
+  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
   Variable-&gt;addIncoming(StartVal, PreheaderBB);
 </pre>
 </div>
@@ -876,11 +865,13 @@ language.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<h2><a name="code">Full Code Listing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Here is the complete code listing for our running example, enhanced with the
@@ -1452,7 +1443,7 @@ Value *IfExprAST::Codegen() {
   // Emit merge block.
   TheFunction-&gt;getBasicBlockList().push_back(MergeBB);
   Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
                                   "iftmp");
   
   PN-&gt;addIncoming(ThenV, ThenBB);
@@ -1494,7 +1485,7 @@ Value *ForExprAST::Codegen() {
   Builder.SetInsertPoint(LoopBB);
   
   // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
+  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
   Variable-&gt;addIncoming(StartVal, PreheaderBB);
   
   // Within the loop, the variable is defined equal to the PHI node.  If it
@@ -1721,7 +1712,7 @@ int main() {
 
   // Create the JIT.  This takes ownership of the module.
   std::string ErrStr;
-  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&amp;ErrStr).create();
   if (!TheExecutionEngine) {
     fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
     exit(1);
@@ -1773,8 +1764,8 @@ int main() {
   src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl6.html b/docs/tutorial/LangImpl6.html
index 7ddf3a0..31d7ff4 100644
--- a/docs/tutorial/LangImpl6.html
+++ b/docs/tutorial/LangImpl6.html
@@ -11,7 +11,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Extending the Language: User-defined Operators</div>
+<h1>Kaleidoscope: Extending the Language: User-defined Operators</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -34,10 +34,10 @@ Variables / SSA Construction</li>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="intro">Chapter 6 Introduction</a></div>
+<h2><a name="intro">Chapter 6 Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to Chapter 6 of the "<a href="index.html">Implementing a language
 with LLVM</a>" tutorial.  At this point in our tutorial, we now have a fully
@@ -60,10 +60,10 @@ an example of what you can build with Kaleidoscope and its feature set.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="idea">User-defined Operators: the Idea</a></div>
+<h2><a name="idea">User-defined Operators: the Idea</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 The "operator overloading" that we will add to Kaleidoscope is more general than
@@ -125,10 +125,10 @@ operators.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="binary">User-defined Binary Operators</a></div>
+<h2><a name="binary">User-defined Binary Operators</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Adding support for user-defined binary operators is pretty simple with our
 current framework.  We'll first add support for the unary/binary keywords:</p>
@@ -342,10 +342,10 @@ see what it takes.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="unary">User-defined Unary Operators</a></div>
+<h2><a name="unary">User-defined Unary Operators</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Since we don't currently support unary operators in the Kaleidoscope
 language, we'll need to add everything to support them.  Above, we added simple
@@ -491,10 +491,10 @@ is simpler primarily because it doesn't need to handle any predefined operators.
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="example">Kicking the Tires</a></div>
+<h2><a name="example">Kicking the Tires</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>It is somewhat hard to believe, but with a few simple extensions we've
 covered in the last chapters, we have grown a real-ish language.  With this, we 
@@ -796,10 +796,10 @@ add variable mutation without building SSA in your front-end.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<h2><a name="code">Full Code Listing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Here is the complete code listing for our running example, enhanced with the
@@ -1475,7 +1475,7 @@ Value *IfExprAST::Codegen() {
   // Emit merge block.
   TheFunction-&gt;getBasicBlockList().push_back(MergeBB);
   Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
                                   "iftmp");
   
   PN-&gt;addIncoming(ThenV, ThenBB);
@@ -1517,7 +1517,7 @@ Value *ForExprAST::Codegen() {
   Builder.SetInsertPoint(LoopBB);
   
   // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
+  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
   Variable-&gt;addIncoming(StartVal, PreheaderBB);
   
   // Within the loop, the variable is defined equal to the PHI node.  If it
@@ -1758,7 +1758,7 @@ int main() {
 
   // Create the JIT.  This takes ownership of the module.
   std::string ErrStr;
-  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&amp;ErrStr).create();
   if (!TheExecutionEngine) {
     fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
     exit(1);
@@ -1810,8 +1810,8 @@ int main() {
   src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl7.html b/docs/tutorial/LangImpl7.html
index 3b36129..a4a21f1 100644
--- a/docs/tutorial/LangImpl7.html
+++ b/docs/tutorial/LangImpl7.html
@@ -12,7 +12,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Extending the Language: Mutable Variables</div>
+<h1>Kaleidoscope: Extending the Language: Mutable Variables</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -38,10 +38,10 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="intro">Chapter 7 Introduction</a></div>
+<h2><a name="intro">Chapter 7 Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to Chapter 7 of the "<a href="index.html">Implementing a language
 with LLVM</a>" tutorial.  In chapters 1 through 6, we've built a very
@@ -66,10 +66,10 @@ support for this, though the way it works is a bit unexpected for some.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="why">Why is this a hard problem?</a></div>
+<h2><a name="why">Why is this a hard problem?</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 To understand why mutable variables cause complexities in SSA construction, 
@@ -140,10 +140,10 @@ logic.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="memory">Memory in LLVM</a></div>
+<h2><a name="memory">Memory in LLVM</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The 'trick' here is that while LLVM does require all register values to be
 in SSA form, it does not require (or permit) memory objects to be in SSA form.
@@ -321,11 +321,10 @@ variables now!
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="kalvars">Mutable Variables in 
-Kaleidoscope</a></div>
+<h2><a name="kalvars">Mutable Variables in Kaleidoscope</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Now that we know the sort of problem we want to tackle, lets see what this
 looks like in the context of our little Kaleidoscope language.  We're going to
@@ -378,11 +377,10 @@ Kaleidoscope to support new variable definitions.
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="adjustments">Adjusting Existing Variables for
-Mutation</a></div>
+<h2><a name="adjustments">Adjusting Existing Variables for Mutation</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 The symbol table in Kaleidoscope is managed at code generation time by the 
@@ -648,10 +646,10 @@ we'll add the assignment operator.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="assignment">New Assignment Operator</a></div>
+<h2><a name="assignment">New Assignment Operator</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>With our current framework, adding a new assignment operator is really
 simple.  We will parse it just like any other binary operator, but handle it
@@ -745,11 +743,10 @@ add this next!
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="localvars">User-defined Local 
-Variables</a></div>
+<h2><a name="localvars">User-defined Local Variables</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Adding var/in is just like any other other extensions we made to 
 Kaleidoscope: we extend the lexer, the parser, the AST and the code generator.
@@ -979,10 +976,10 @@ anywhere in sight.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<h2><a name="code">Full Code Listing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Here is the complete code listing for our running example, enhanced with mutable
@@ -1755,7 +1752,7 @@ Value *IfExprAST::Codegen() {
   // Emit merge block.
   TheFunction-&gt;getBasicBlockList().push_back(MergeBB);
   Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
                                   "iftmp");
   
   PN-&gt;addIncoming(ThenV, ThenBB);
@@ -2160,8 +2157,8 @@ int main() {
   src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl8.html b/docs/tutorial/LangImpl8.html
index fe42a22..cc55d40 100644
--- a/docs/tutorial/LangImpl8.html
+++ b/docs/tutorial/LangImpl8.html
@@ -11,8 +11,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Conclusion and other useful LLVM
- tidbits</div>
+<h1>Kaleidoscope: Conclusion and other useful LLVM tidbits</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -43,10 +42,10 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="conclusion">Tutorial Conclusion</a></div>
+<h2><a name="conclusion">Tutorial Conclusion</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to the the final chapter of the "<a href="index.html">Implementing a
 language with LLVM</a>" tutorial.  In the course of this tutorial, we have grown
@@ -154,23 +153,19 @@ are very useful if you want to take advantage of LLVM's capabilities.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="llvmirproperties">Properties of the LLVM 
-IR</a></div>
+<h2><a name="llvmirproperties">Properties of the LLVM IR</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>We have a couple common questions about code in the LLVM IR form - lets just
 get these out of the way right now, shall we?</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="targetindep">Target 
-Independence</a></div>
+<h4><a name="targetindep">Target Independence</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>Kaleidoscope is an example of a "portable language": any program written in
 Kaleidoscope will work the same way on any target that it runs on.  Many other
@@ -221,10 +216,10 @@ in-kernel language.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="safety">Safety Guarantees</a></div>
+<h4><a name="safety">Safety Guarantees</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>Many of the languages above are also "safe" languages: it is impossible for
 a program written in Java to corrupt its address space and crash the process
@@ -243,11 +238,10 @@ list</a> if you are interested in more details.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="langspecific">Language-Specific 
-Optimizations</a></div>
+<h4><a name="langspecific">Language-Specific Optimizations</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>One thing about LLVM that turns off many people is that it does not solve all
 the world's problems in one system (sorry 'world hunger', someone else will have
@@ -297,24 +291,23 @@ language-specific AST.
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="tipsandtricks">Tips and Tricks</a></div>
+<h2><a name="tipsandtricks">Tips and Tricks</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>There is a variety of useful tips and tricks that you come to know after
 working on/with LLVM that aren't obvious at first glance.  Instead of letting
 everyone rediscover them, this section talks about some of these issues.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="offsetofsizeof">Implementing portable
-offsetof/sizeof</a></div>
+<h4><a name="offsetofsizeof">Implementing portable offsetof/sizeof</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>One interesting thing that comes up, if you are trying to keep the code 
 generated by your compiler "target independent", is that you often need to know
@@ -331,11 +324,10 @@ in a portable way.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="gcstack">Garbage Collected 
-Stack Frames</a></div>
+<h4><a name="gcstack">Garbage Collected Stack Frames</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>Some languages want to explicitly manage their stack frames, often so that
 they are garbage collected or to allow easy implementation of closures.  There
@@ -349,6 +341,8 @@ Passing Style</a> and the use of tail calls (which LLVM also supports).</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 <hr>
 <address>
@@ -358,8 +352,8 @@ Passing Style</a> and the use of tail calls (which LLVM also supports).</p>
   src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl1.html b/docs/tutorial/OCamlLangImpl1.html
index 4410613..7cae68c 100644
--- a/docs/tutorial/OCamlLangImpl1.html
+++ b/docs/tutorial/OCamlLangImpl1.html
@@ -12,7 +12,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Tutorial Introduction and the Lexer</div>
+<h1>Kaleidoscope: Tutorial Introduction and the Lexer</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -35,10 +35,10 @@ AST</li>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="intro">Tutorial Introduction</a></div>
+<h2><a name="intro">Tutorial Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to the "Implementing a language with LLVM" tutorial.  This tutorial
 runs through the implementation of a simple language, showing how fun and
@@ -130,10 +130,10 @@ languages!</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="language">The Basic Language</a></div>
+<h2><a name="language">The Basic Language</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>This tutorial will be illustrated with a toy language that we'll call
 "<a href="http://en.wikipedia.org/wiki/Kaleidoscope">Kaleidoscope</a>" (derived
@@ -188,10 +188,10 @@ a Mandelbrot Set</a> at various levels of magnification.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="lexer">The Lexer</a></div>
+<h2><a name="lexer">The Lexer</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>When it comes to implementing a language, the first thing needed is
 the ability to process a text file and recognize what it says.  The traditional
@@ -358,8 +358,8 @@ include a driver so that you can use the lexer and parser together.
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl2.html b/docs/tutorial/OCamlLangImpl2.html
index 41d0956..e1bb871 100644
--- a/docs/tutorial/OCamlLangImpl2.html
+++ b/docs/tutorial/OCamlLangImpl2.html
@@ -12,7 +12,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Implementing a Parser and AST</div>
+<h1>Kaleidoscope: Implementing a Parser and AST</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -40,10 +40,10 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="intro">Chapter 2 Introduction</a></div>
+<h2><a name="intro">Chapter 2 Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to Chapter 2 of the "<a href="index.html">Implementing a language
 with LLVM in Objective Caml</a>" tutorial.  This chapter shows you how to use
@@ -65,10 +65,10 @@ Tree.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="ast">The Abstract Syntax Tree (AST)</a></div>
+<h2><a name="ast">The Abstract Syntax Tree (AST)</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The AST for a program captures its behavior in such a way that it is easy for
 later stages of the compiler (e.g. code generation) to interpret.  We basically
@@ -146,10 +146,10 @@ bodies in Kaleidoscope.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="parserbasics">Parser Basics</a></div>
+<h2><a name="parserbasics">Parser Basics</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Now that we have an AST to build, we need to define the parser code to build
 it.  The idea here is that we want to parse something like "x+y" (which is
@@ -181,11 +181,10 @@ piece of our grammar: numeric literals.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="parserprimexprs">Basic Expression
- Parsing</a></div>
+<h2><a name="parserprimexprs">Basic Expression Parsing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>We start with numeric literals, because they are the simplest to process.
 For each production in our grammar, we'll define a function which parses that
@@ -303,11 +302,10 @@ They are a bit more complex.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="parserbinops">Binary Expression
- Parsing</a></div>
+<h2><a name="parserbinops">Binary Expression Parsing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Binary expressions are significantly harder to parse because they are often
 ambiguous.  For example, when given the string "x+y*z", the parser can choose
@@ -517,10 +515,10 @@ handle function definitions, etc.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="parsertop">Parsing the Rest</a></div>
+<h2><a name="parsertop">Parsing the Rest</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 The next thing missing is handling of function prototypes.  In Kaleidoscope,
@@ -596,10 +594,10 @@ actually <em>execute</em> this code we've built!</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="driver">The Driver</a></div>
+<h2><a name="driver">The Driver</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The driver for this simply invokes all of the parsing pieces with a top-level
 dispatch loop.  There isn't much interesting here, so I'll just include the
@@ -652,10 +650,10 @@ type "4+5;", and the parser will know you are done.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="conclusions">Conclusions</a></div>
+<h2><a name="conclusions">Conclusions</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>With just under 300 lines of commented code (240 lines of non-comment,
 non-blank code), we fully defined our minimal language, including a lexer,
@@ -689,10 +687,10 @@ Representation (IR) from the AST.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<h2><a name="code">Full Code Listing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Here is the complete code listing for this and the previous chapter.
@@ -1038,8 +1036,8 @@ main ()
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a>
   <a href="mailto:erickt@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl3.html b/docs/tutorial/OCamlLangImpl3.html
index c7c5370..e52bb6c 100644
--- a/docs/tutorial/OCamlLangImpl3.html
+++ b/docs/tutorial/OCamlLangImpl3.html
@@ -12,7 +12,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Code generation to LLVM IR</div>
+<h1>Kaleidoscope: Code generation to LLVM IR</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -38,10 +38,10 @@ Support</li>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="intro">Chapter 3 Introduction</a></div>
+<h2><a name="intro">Chapter 3 Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to Chapter 3 of the "<a href="index.html">Implementing a language
 with LLVM</a>" tutorial.  This chapter shows you how to transform the <a
@@ -57,10 +57,10 @@ LLVM SVN to work.  LLVM 2.2 and before will not work with it.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="basics">Code Generation Setup</a></div>
+<h2><a name="basics">Code Generation Setup</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 In order to generate LLVM IR, we want some simple setup to get started.  First
@@ -128,10 +128,10 @@ that this has already been done, and we'll just use it to emit code.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="exprs">Expression Code Generation</a></div>
+<h2><a name="exprs">Expression Code Generation</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Generating LLVM code for expression nodes is very straightforward: less
 than 30 lines of commented code for all four of our expression nodes.  First
@@ -263,10 +263,10 @@ basic framework.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="funcs">Function Code Generation</a></div>
+<h2><a name="funcs">Function Code Generation</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Code generation for prototypes and functions must handle a number of
 details, which make their code less beautiful than expression code
@@ -466,11 +466,10 @@ def bar() foo(1, 2); # error, unknown function "foo"
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="driver">Driver Changes and
-Closing Thoughts</a></div>
+<h2><a name="driver">Driver Changes and Closing Thoughts</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 For now, code generation to LLVM doesn't really get us much, except that we can
@@ -607,10 +606,10 @@ support</a> to this so we can actually start running code!</p>
 
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<h2><a name="code">Full Code Listing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Here is the complete code listing for our running example, enhanced with the
@@ -1086,8 +1085,8 @@ main ()
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl4.html b/docs/tutorial/OCamlLangImpl4.html
index a86184c..db164d5 100644
--- a/docs/tutorial/OCamlLangImpl4.html
+++ b/docs/tutorial/OCamlLangImpl4.html
@@ -12,7 +12,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Adding JIT and Optimizer Support</div>
+<h1>Kaleidoscope: Adding JIT and Optimizer Support</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -37,10 +37,10 @@ Flow</li>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="intro">Chapter 4 Introduction</a></div>
+<h2><a name="intro">Chapter 4 Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to Chapter 4 of the "<a href="index.html">Implementing a language
 with LLVM</a>" tutorial.  Chapters 1-3 described the implementation of a simple
@@ -52,11 +52,10 @@ for the Kaleidoscope language.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="trivialconstfold">Trivial Constant
-Folding</a></div>
+<h2><a name="trivialconstfold">Trivial Constant Folding</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p><b>Note:</b> the default <tt>IRBuilder</tt> now always includes the constant 
 folding optimisations below.<p>
@@ -148,11 +147,10 @@ range of optimizations that you can use, in the form of "passes".</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="optimizerpasses">LLVM Optimization
- Passes</a></div>
+<h2><a name="optimizerpasses">LLVM Optimization Passes</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>LLVM provides many optimization passes, which do many different sorts of
 things and have different tradeoffs.  Unlike other systems, LLVM doesn't hold
@@ -283,10 +281,10 @@ executing it!</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="jit">Adding a JIT Compiler</a></div>
+<h2><a name="jit">Adding a JIT Compiler</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Code that is available in LLVM IR can have a wide variety of tools
 applied to it.  For example, you can run optimizations on it (as we did above),
@@ -486,10 +484,10 @@ constructs</a>, tackling some interesting LLVM IR issues along the way.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<h2><a name="code">Full Code Listing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Here is the complete code listing for our running example, enhanced with the
@@ -1022,8 +1020,8 @@ extern double putchard(double X) {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl5.html b/docs/tutorial/OCamlLangImpl5.html
index 3173803..ca79691 100644
--- a/docs/tutorial/OCamlLangImpl5.html
+++ b/docs/tutorial/OCamlLangImpl5.html
@@ -12,7 +12,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Extending the Language: Control Flow</div>
+<h1>Kaleidoscope: Extending the Language: Control Flow</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -52,10 +52,10 @@ User-defined Operators</li>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="intro">Chapter 5 Introduction</a></div>
+<h2><a name="intro">Chapter 5 Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to Chapter 5 of the "<a href="index.html">Implementing a language
 with LLVM</a>" tutorial.  Parts 1-4 described the implementation of the simple
@@ -69,10 +69,10 @@ have an if/then/else expression plus a simple 'for' loop.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="ifthen">If/Then/Else</a></div>
+<h2><a name="ifthen">If/Then/Else</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Extending Kaleidoscope to support if/then/else is quite straightforward.  It
@@ -112,15 +112,12 @@ Since Kaleidoscope allows side-effects, this behavior is important to nail down.
 <p>Now that we know what we "want", lets break this down into its constituent
 pieces.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="iflexer">Lexer Extensions for
-If/Then/Else</a></div>
+<h4><a name="iflexer">Lexer Extensions for If/Then/Else</a></h4>
 <!-- ======================================================================= -->
 
 
-<div class="doc_text">
+<div>
 
 <p>The lexer extensions are straightforward.  First we add new variants
 for the relevant tokens:</p>
@@ -153,11 +150,10 @@ stuff:</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="ifast">AST Extensions for
- If/Then/Else</a></div>
+<h4><a name="ifast">AST Extensions for If/Then/Else</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>To represent the new expression we add a new AST variant for it:</p>
 
@@ -175,11 +171,10 @@ type expr =
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="ifparser">Parser Extensions for
-If/Then/Else</a></div>
+<h4><a name="ifparser">Parser Extensions for If/Then/Else</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>Now that we have the relevant tokens coming from the lexer and we have the
 AST node to build, our parsing logic is relatively straightforward.  First we
@@ -214,10 +209,10 @@ let rec parse_primary = parser
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="ifir">LLVM IR for If/Then/Else</a></div>
+<h4><a name="ifir">LLVM IR for If/Then/Else</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>Now that we have it parsing and building the AST, the final piece is adding
 LLVM code generation support.  This is the most interesting part of the
@@ -331,11 +326,10 @@ directly.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="ifcodegen">Code Generation for
-If/Then/Else</a></div>
+<h4><a name="ifcodegen">Code Generation for If/Then/Else</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>In order to generate code for this, we implement the <tt>Codegen</tt> method
 for <tt>IfExprAST</tt>:</p>
@@ -492,11 +486,13 @@ another useful expression that is familiar from non-functional languages...</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="for">'for' Loop Expression</a></div>
+<h2><a name="for">'for' Loop Expression</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Now that we know how to add basic control flow constructs to the language,
 we have the tools to add more powerful things.  Lets add something more
@@ -525,14 +521,11 @@ variables, it will get more useful.</p>
 <p>As before, lets talk about the changes that we need to Kaleidoscope to
 support this.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="forlexer">Lexer Extensions for
-the 'for' Loop</a></div>
+<h4><a name="forlexer">Lexer Extensions for the 'for' Loop</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>The lexer extensions are the same sort of thing as for if/then/else:</p>
 
@@ -559,11 +552,10 @@ the 'for' Loop</a></div>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="forast">AST Extensions for
-the 'for' Loop</a></div>
+<h4><a name="forast">AST Extensions for the 'for' Loop</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>The AST variant is just as simple.  It basically boils down to capturing
 the variable name and the constituent expressions in the node.</p>
@@ -580,11 +572,10 @@ type expr =
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="forparser">Parser Extensions for
-the 'for' Loop</a></div>
+<h4><a name="forparser">Parser Extensions for the 'for' Loop</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>The parser code is also fairly standard.  The only interesting thing here is
 handling of the optional step value.  The parser code handles it by checking to
@@ -628,11 +619,10 @@ let rec parse_primary = parser
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="forir">LLVM IR for
-the 'for' Loop</a></div>
+<h4><a name="forir">LLVM IR for the 'for' Loop</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>Now we get to the good part: the LLVM IR we want to generate for this thing.
 With the simple example above, we get this LLVM IR (note that this dump is
@@ -674,11 +664,10 @@ expressions, and some basic blocks.  Lets see how this fits together.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="forcodegen">Code Generation for
-the 'for' Loop</a></div>
+<h4><a name="forcodegen">Code Generation for the 'for' Loop</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>The first part of Codegen is very simple: we just output the start expression
 for the loop value:</p>
@@ -851,11 +840,13 @@ to our poor innocent language.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<h2><a name="code">Full Code Listing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Here is the complete code listing for our running example, enhanced with the
@@ -1562,8 +1553,8 @@ operators</a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl6.html b/docs/tutorial/OCamlLangImpl6.html
index 1d4f8c7..bde429b 100644
--- a/docs/tutorial/OCamlLangImpl6.html
+++ b/docs/tutorial/OCamlLangImpl6.html
@@ -12,7 +12,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Extending the Language: User-defined Operators</div>
+<h1>Kaleidoscope: Extending the Language: User-defined Operators</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -38,10 +38,10 @@ Variables / SSA Construction</li>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="intro">Chapter 6 Introduction</a></div>
+<h2><a name="intro">Chapter 6 Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to Chapter 6 of the "<a href="index.html">Implementing a language
 with LLVM</a>" tutorial.  At this point in our tutorial, we now have a fully
@@ -64,10 +64,10 @@ an example of what you can build with Kaleidoscope and its feature set.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="idea">User-defined Operators: the Idea</a></div>
+<h2><a name="idea">User-defined Operators: the Idea</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 The "operator overloading" that we will add to Kaleidoscope is more general than
@@ -129,10 +129,10 @@ operators.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="binary">User-defined Binary Operators</a></div>
+<h2><a name="binary">User-defined Binary Operators</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Adding support for user-defined binary operators is pretty simple with our
 current framework.  We'll first add support for the unary/binary keywords:</p>
@@ -320,10 +320,10 @@ see what it takes.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="unary">User-defined Unary Operators</a></div>
+<h2><a name="unary">User-defined Unary Operators</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Since we don't currently support unary operators in the Kaleidoscope
 language, we'll need to add everything to support them.  Above, we added simple
@@ -472,10 +472,10 @@ is simpler primarily because it doesn't need to handle any predefined operators.
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="example">Kicking the Tires</a></div>
+<h2><a name="example">Kicking the Tires</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>It is somewhat hard to believe, but with a few simple extensions we've
 covered in the last chapters, we have grown a real-ish language.  With this, we
@@ -778,10 +778,10 @@ add variable mutation without building SSA in your front-end.</p>
 
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<h2><a name="code">Full Code Listing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Here is the complete code listing for our running example, enhanced with the
@@ -1567,8 +1567,8 @@ SSA construction</a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-06-21 22:31:30 +0200 (Mon, 21 Jun 2010) $
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl7.html b/docs/tutorial/OCamlLangImpl7.html
index a9fcd70..a48e679 100644
--- a/docs/tutorial/OCamlLangImpl7.html
+++ b/docs/tutorial/OCamlLangImpl7.html
@@ -13,7 +13,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Extending the Language: Mutable Variables</div>
+<h1>Kaleidoscope: Extending the Language: Mutable Variables</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -42,10 +42,10 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="intro">Chapter 7 Introduction</a></div>
+<h2><a name="intro">Chapter 7 Introduction</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to Chapter 7 of the "<a href="index.html">Implementing a language
 with LLVM</a>" tutorial.  In chapters 1 through 6, we've built a very
@@ -70,10 +70,10 @@ support for this, though the way it works is a bit unexpected for some.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="why">Why is this a hard problem?</a></div>
+<h2><a name="why">Why is this a hard problem?</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 To understand why mutable variables cause complexities in SSA construction,
@@ -144,10 +144,10 @@ logic.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="memory">Memory in LLVM</a></div>
+<h2><a name="memory">Memory in LLVM</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>The 'trick' here is that while LLVM does require all register values to be
 in SSA form, it does not require (or permit) memory objects to be in SSA form.
@@ -325,11 +325,10 @@ variables now!
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="kalvars">Mutable Variables in
-Kaleidoscope</a></div>
+<h2><a name="kalvars">Mutable Variables in Kaleidoscope</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Now that we know the sort of problem we want to tackle, lets see what this
 looks like in the context of our little Kaleidoscope language.  We're going to
@@ -382,11 +381,10 @@ Kaleidoscope to support new variable definitions.
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="adjustments">Adjusting Existing Variables for
-Mutation</a></div>
+<h2><a name="adjustments">Adjusting Existing Variables for Mutation</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 The symbol table in Kaleidoscope is managed at code generation time by the
@@ -672,10 +670,10 @@ we'll add the assignment operator.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="assignment">New Assignment Operator</a></div>
+<h2><a name="assignment">New Assignment Operator</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>With our current framework, adding a new assignment operator is really
 simple.  We will parse it just like any other binary operator, but handle it
@@ -773,11 +771,10 @@ add this next!
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="localvars">User-defined Local
-Variables</a></div>
+<h2><a name="localvars">User-defined Local Variables</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Adding var/in is just like any other other extensions we made to
 Kaleidoscope: we extend the lexer, the parser, the AST and the code generator.
@@ -956,10 +953,10 @@ anywhere in sight.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<h2><a name="code">Full Code Listing</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>
 Here is the complete code listing for our running example, enhanced with mutable
@@ -1887,7 +1884,7 @@ extern double printd(double X) {
 </dd>
 </dl>
 
-<a href="LangImpl8.html">Next: Conclusion and other useful LLVM tidbits</a>
+<a href="OCamlLangImpl8.html">Next: Conclusion and other useful LLVM tidbits</a>
 </div>
 
 <!-- *********************************************************************** -->
@@ -1899,9 +1896,9 @@ extern double printd(double X) {
   src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
   <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  Last modified: $Date: 2011-01-01 04:27:43 +0100 (Sat, 01 Jan 2011) $
+  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl8.html b/docs/tutorial/OCamlLangImpl8.html
index 64a6200..eed8c03 100644
--- a/docs/tutorial/OCamlLangImpl8.html
+++ b/docs/tutorial/OCamlLangImpl8.html
@@ -11,8 +11,7 @@
 
 <body>
 
-<div class="doc_title">Kaleidoscope: Conclusion and other useful LLVM
- tidbits</div>
+<h1>Kaleidoscope: Conclusion and other useful LLVM tidbits</h1>
 
 <ul>
 <li><a href="index.html">Up to Tutorial Index</a></li>
@@ -43,10 +42,10 @@
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="conclusion">Tutorial Conclusion</a></div>
+<h2><a name="conclusion">Tutorial Conclusion</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>Welcome to the the final chapter of the "<a href="index.html">Implementing a
 language with LLVM</a>" tutorial.  In the course of this tutorial, we have grown
@@ -154,23 +153,19 @@ are very useful if you want to take advantage of LLVM's capabilities.</p>
 </div>
 
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="llvmirproperties">Properties of the LLVM 
-IR</a></div>
+<h2><a name="llvmirproperties">Properties of the LLVM IR</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>We have a couple common questions about code in the LLVM IR form - lets just
 get these out of the way right now, shall we?</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="targetindep">Target 
-Independence</a></div>
+<h4><a name="targetindep">Target Independence</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>Kaleidoscope is an example of a "portable language": any program written in
 Kaleidoscope will work the same way on any target that it runs on.  Many other
@@ -221,10 +216,10 @@ in-kernel language.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="safety">Safety Guarantees</a></div>
+<h4><a name="safety">Safety Guarantees</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>Many of the languages above are also "safe" languages: it is impossible for
 a program written in Java to corrupt its address space and crash the process
@@ -243,11 +238,10 @@ list</a> if you are interested in more details.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="langspecific">Language-Specific 
-Optimizations</a></div>
+<h4><a name="langspecific">Language-Specific Optimizations</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>One thing about LLVM that turns off many people is that it does not solve all
 the world's problems in one system (sorry 'world hunger', someone else will have
@@ -297,24 +291,23 @@ language-specific AST.
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
-<div class="doc_section"><a name="tipsandtricks">Tips and Tricks</a></div>
+<h2><a name="tipsandtricks">Tips and Tricks</a></h2>
 <!-- *********************************************************************** -->
 
-<div class="doc_text">
+<div>
 
 <p>There is a variety of useful tips and tricks that you come to know after
 working on/with LLVM that aren't obvious at first glance.  Instead of letting
 everyone rediscover them, this section talks about some of these issues.</p>
 
-</div>
-
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="offsetofsizeof">Implementing portable
-offsetof/sizeof</a></div>
+<h4><a name="offsetofsizeof">Implementing portable offsetof/sizeof</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>One interesting thing that comes up, if you are trying to keep the code 
 generated by your compiler "target independent", is that you often need to know
@@ -331,11 +324,10 @@ in a portable way.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsubsection"><a name="gcstack">Garbage Collected 
-Stack Frames</a></div>
+<h4><a name="gcstack">Garbage Collected Stack Frames</a></h4>
 <!-- ======================================================================= -->
 
-<div class="doc_text">
+<div>
 
 <p>Some languages want to explicitly manage their stack frames, often so that
 they are garbage collected or to allow easy implementation of closures.  There
@@ -349,6 +341,8 @@ Passing Style</a> and the use of tail calls (which LLVM also supports).</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 <hr>
 <address>
@@ -358,7 +352,7 @@ Passing Style</a> and the use of tail calls (which LLVM also supports).</p>
   src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
   Last modified: $Date$
 </address>
 </body>
diff --git a/docs/tutorial/index.html b/docs/tutorial/index.html
index 11dd5e2..0a8cae2 100644
--- a/docs/tutorial/index.html
+++ b/docs/tutorial/index.html
@@ -12,7 +12,7 @@
 
 <body>
 
-<div class="doc_title"> LLVM Tutorial: Table of Contents </div>
+<h1>LLVM Tutorial: Table of Contents</h1>
 
 <ol>
   <li>Kaleidoscope: Implementing a Language with LLVM
diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp
index 8536915..54f3553 100644
--- a/examples/BrainF/BrainF.cpp
+++ b/examples/BrainF/BrainF.cpp
@@ -294,8 +294,7 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb,
           // Make part of PHI instruction now, wait until end of loop to finish
           PHINode *phi_0 =
             PHINode::Create(PointerType::getUnqual(IntegerType::getInt8Ty(C)),
-                            headreg, testbb);
-          phi_0->reserveOperandSpace(2);
+                            2, headreg, testbb);
           phi_0->addIncoming(curhead, bb_0);
           curhead = phi_0;
 
@@ -449,8 +448,8 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb,
 
       //%head.%d = phi i8 *[%head.%d, %main.%d]
       PHINode *phi_1 = builder->
-        CreatePHI(PointerType::getUnqual(IntegerType::getInt8Ty(C)), headreg);
-      phi_1->reserveOperandSpace(1);
+        CreatePHI(PointerType::getUnqual(IntegerType::getInt8Ty(C)), 1,
+                  headreg);
       phi_1->addIncoming(head_0, testbb);
       curhead = phi_1;
     }
diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp
index 95ccd24..e5bd377 100644
--- a/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -1,12 +1,11 @@
-//===-- examples/ExceptionDemo/ExceptionDemo.cpp - 
-//                      An example use of the llvm Exception mechanism --===//
+//===-- ExceptionDemo.cpp - An example using llvm Exceptions --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
-//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 // Demo program which implements an example LLVM exception implementation, and
 // shows several test cases including the handling of foreign exceptions.
@@ -46,8 +45,7 @@
 // This code uses code from the llvm compiler-rt project and the llvm 
 // Kaleidoscope project.
 //
-//===--------------------------------------------------------------------===//
-
+//===----------------------------------------------------------------------===//
 
 #include "llvm/LLVMContext.h"
 #include "llvm/DerivedTypes.h"
@@ -64,11 +62,17 @@
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/Dwarf.h"
 
+// FIXME: Although all systems tested with (Linux, OS X), do not need this 
+//        header file included. A user on ubuntu reported, undefined symbols 
+//        for stderr, and fprintf, and the addition of this include fixed the
+//        issue for them. Given that LLVM's best practices include the goal 
+//        of reducing the number of redundant header files included, the 
+//        correct solution would be to find out why these symbols are not 
+//        defined for the system in question, and fix the issue by finding out
+//        which LLVM header file, if any, would include these symbols.
 #include <cstdio>
-#include <string>
+
 #include <sstream>
-#include <map>
-#include <vector>
 #include <stdexcept>
 
 
@@ -80,8 +84,8 @@
 //     http://refspecs.freestandards.org/abi-eh-1.21.html
 
 extern "C" {
-
-typedef enum {
+  
+  typedef enum {
     _URC_NO_REASON = 0,
     _URC_FOREIGN_EXCEPTION_CAUGHT = 1,
     _URC_FATAL_PHASE2_ERROR = 2,
@@ -91,43 +95,43 @@ typedef enum {
     _URC_HANDLER_FOUND = 6,
     _URC_INSTALL_CONTEXT = 7,
     _URC_CONTINUE_UNWIND = 8
-} _Unwind_Reason_Code;
-
-typedef enum {
+  } _Unwind_Reason_Code;
+  
+  typedef enum {
     _UA_SEARCH_PHASE = 1,
     _UA_CLEANUP_PHASE = 2,
     _UA_HANDLER_FRAME = 4,
     _UA_FORCE_UNWIND = 8,
     _UA_END_OF_STACK = 16
-} _Unwind_Action;
-
-struct _Unwind_Exception;
-
-typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code,
-                                              struct _Unwind_Exception *);
-
-struct _Unwind_Exception {
+  } _Unwind_Action;
+  
+  struct _Unwind_Exception;
+  
+  typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code,
+                                                struct _Unwind_Exception *);
+  
+  struct _Unwind_Exception {
     uint64_t exception_class;
     _Unwind_Exception_Cleanup_Fn exception_cleanup;
-
+    
     uintptr_t private_1;    
     uintptr_t private_2;    
-
+    
     // @@@ The IA-64 ABI says that this structure must be double-word aligned.
     //  Taking that literally does not make much sense generically.  Instead 
     //  we provide the maximum alignment required by any type for the machine.
-} __attribute__((__aligned__));
-
-struct _Unwind_Context;
-typedef struct _Unwind_Context* _Unwind_Context_t;
-
-extern const uint8_t* _Unwind_GetLanguageSpecificData (_Unwind_Context_t c);
-extern uintptr_t _Unwind_GetGR (_Unwind_Context_t c, int i);
-extern void _Unwind_SetGR (_Unwind_Context_t c, int i, uintptr_t n);
-extern void _Unwind_SetIP (_Unwind_Context_t, uintptr_t new_value);
-extern uintptr_t _Unwind_GetIP (_Unwind_Context_t context);
-extern uintptr_t _Unwind_GetRegionStart (_Unwind_Context_t context);
-
+  } __attribute__((__aligned__));
+  
+  struct _Unwind_Context;
+  typedef struct _Unwind_Context *_Unwind_Context_t;
+  
+  extern const uint8_t *_Unwind_GetLanguageSpecificData (_Unwind_Context_t c);
+  extern uintptr_t _Unwind_GetGR (_Unwind_Context_t c, int i);
+  extern void _Unwind_SetGR (_Unwind_Context_t c, int i, uintptr_t n);
+  extern void _Unwind_SetIP (_Unwind_Context_t, uintptr_t new_value);
+  extern uintptr_t _Unwind_GetIP (_Unwind_Context_t context);
+  extern uintptr_t _Unwind_GetRegionStart (_Unwind_Context_t context);
+  
 } // extern "C"
 
 //
@@ -136,8 +140,8 @@ extern uintptr_t _Unwind_GetRegionStart (_Unwind_Context_t context);
 
 /// This is our simplistic type info
 struct OurExceptionType_t {
-    /// type info type
-    int type;
+  /// type info type
+  int type;
 };
 
 
@@ -148,10 +152,10 @@ struct OurExceptionType_t {
 ///       on a double word boundary. This is necessary to match the standard:
 ///       http://refspecs.freestandards.org/abi-eh-1.21.html
 struct OurBaseException_t {
-    struct OurExceptionType_t type;
-
-    // Note: This is properly aligned in unwind.h
-    struct _Unwind_Exception unwindException;
+  struct OurExceptionType_t type;
+  
+  // Note: This is properly aligned in unwind.h
+  struct _Unwind_Exception unwindException;
 };
 
 
@@ -169,7 +173,7 @@ static std::map<std::string, llvm::Value*> namedValues;
 int64_t ourBaseFromUnwindOffset;
 
 const unsigned char ourBaseExcpClassChars[] = 
-                                {'o', 'b', 'j', '\0', 'b', 'a', 's', '\0'};
+{'o', 'b', 'j', '\0', 'b', 'a', 's', '\0'};
 
 
 static uint64_t ourBaseExceptionClass = 0;
@@ -177,13 +181,13 @@ static uint64_t ourBaseExceptionClass = 0;
 static std::vector<std::string> ourTypeInfoNames;
 static std::map<int, std::string> ourTypeInfoNamesIndex;
 
-static llvm::StructType* ourTypeInfoType;
-static llvm::StructType* ourExceptionType;
-static llvm::StructType* ourUnwindExceptionType;
+static llvm::StructType *ourTypeInfoType;
+static llvm::StructType *ourExceptionType;
+static llvm::StructType *ourUnwindExceptionType;
 
-static llvm::ConstantInt* ourExceptionNotThrownState;
-static llvm::ConstantInt* ourExceptionThrownState;
-static llvm::ConstantInt* ourExceptionCaughtState;
+static llvm::ConstantInt *ourExceptionNotThrownState;
+static llvm::ConstantInt *ourExceptionThrownState;
+static llvm::ConstantInt *ourExceptionCaughtState;
 
 typedef std::vector<std::string> ArgNames;
 typedef std::vector<const llvm::Type*> ArgTypes;
@@ -204,35 +208,32 @@ typedef std::vector<const llvm::Type*> ArgTypes;
 /// @param declarationOnly for function declarations
 /// @param isVarArg function uses vararg arguments
 /// @returns function instance
-llvm::Function *createFunction(llvm::Module& module,
-                               const llvm::Type* retType,
-                               const ArgTypes& theArgTypes,
-                               const ArgNames& theArgNames,
-                               const std::string& functName,
+llvm::Function *createFunction(llvm::Module &module,
+                               const llvm::Type *retType,
+                               const ArgTypes &theArgTypes,
+                               const ArgNames &theArgNames,
+                               const std::string &functName,
                                llvm::GlobalValue::LinkageTypes linkage,
                                bool declarationOnly,
                                bool isVarArg) {
-    llvm::FunctionType* functType = llvm::FunctionType::get(retType, 
-                                                            theArgTypes, 
-                                                            isVarArg);
-    llvm::Function* ret = llvm::Function::Create(functType, 
-                                                 linkage, 
-                                                 functName, 
-                                                 &module);
-    if (!ret || declarationOnly)
-        return(ret);
-
-    namedValues.clear();
-    unsigned i = 0; 
-    for (llvm::Function::arg_iterator argIndex = ret->arg_begin();
-         i != theArgNames.size();
-         ++argIndex, ++i) {
-
-        argIndex->setName(theArgNames[i]);
-        namedValues[theArgNames[i]] = argIndex;
-    }
-
+  llvm::FunctionType *functType =
+    llvm::FunctionType::get(retType, theArgTypes, isVarArg);
+  llvm::Function *ret =
+    llvm::Function::Create(functType, linkage, functName, &module);
+  if (!ret || declarationOnly)
     return(ret);
+  
+  namedValues.clear();
+  unsigned i = 0; 
+  for (llvm::Function::arg_iterator argIndex = ret->arg_begin();
+       i != theArgNames.size();
+       ++argIndex, ++i) {
+    
+    argIndex->setName(theArgNames[i]);
+    namedValues[theArgNames[i]] = argIndex;
+  }
+  
+  return(ret);
 }
 
 
@@ -243,18 +244,18 @@ llvm::Function *createFunction(llvm::Module& module,
 /// @param type stack variable type
 /// @param initWith optional constant initialization value
 /// @returns AllocaInst instance
-static llvm::AllocaInst *createEntryBlockAlloca(llvm::Function& function,
-                                            const std::string &varName,
-                                            const llvm::Type* type,
-                                            llvm::Constant* initWith = NULL) {
-    llvm::BasicBlock& block = function.getEntryBlock(); 
-    llvm::IRBuilder<> tmp(&block, block.begin());
-    llvm::AllocaInst* ret = tmp.CreateAlloca(type, 0, varName.c_str());
-
-    if (initWith) 
-        tmp.CreateStore(initWith, ret);
-
-    return(ret);
+static llvm::AllocaInst *createEntryBlockAlloca(llvm::Function &function,
+                                                const std::string &varName,
+                                                const llvm::Type *type,
+                                                llvm::Constant *initWith = 0) {
+  llvm::BasicBlock &block = function.getEntryBlock(); 
+  llvm::IRBuilder<> tmp(&block, block.begin());
+  llvm::AllocaInst *ret = tmp.CreateAlloca(type, 0, varName.c_str());
+  
+  if (initWith) 
+    tmp.CreateStore(initWith, ret);
+  
+  return(ret);
 }
 
 
@@ -274,15 +275,15 @@ extern "C" {
 /// Prints a 32 bit number, according to the format, to stderr.
 /// @param intToPrint integer to print 
 /// @param format printf like format to use when printing
-void print32Int(int intToPrint, const char* format) {
-    if (format) {
-        // Note: No NULL check
-        fprintf(stderr, format, intToPrint);
-    }
-    else {
-        // Note: No NULL check
-        fprintf(stderr, "::print32Int(...):NULL arg.\n");
-    }
+void print32Int(int intToPrint, const char *format) {
+  if (format) {
+    // Note: No NULL check
+    fprintf(stderr, format, intToPrint);
+  }
+  else {
+    // Note: No NULL check
+    fprintf(stderr, "::print32Int(...):NULL arg.\n");
+  }
 }
 
 
@@ -291,27 +292,27 @@ void print32Int(int intToPrint, const char* format) {
 /// Prints a 64 bit number, according to the format, to stderr.
 /// @param intToPrint integer to print 
 /// @param format printf like format to use when printing
-void print64Int(long int intToPrint, const char* format) {
-    if (format) {
-        // Note: No NULL check
-        fprintf(stderr, format, intToPrint);
-    }
-    else {
-        // Note: No NULL check
-        fprintf(stderr, "::print64Int(...):NULL arg.\n");
-    }
+void print64Int(long int intToPrint, const char *format) {
+  if (format) {
+    // Note: No NULL check
+    fprintf(stderr, format, intToPrint);
+  }
+  else {
+    // Note: No NULL check
+    fprintf(stderr, "::print64Int(...):NULL arg.\n");
+  }
 }
 
 
 /// Prints a C string to stderr
 /// @param toPrint string to print
-void printStr(char* toPrint) {
-    if (toPrint) {
-        fprintf(stderr, "%s", toPrint);
-    }
-    else {
-        fprintf(stderr, "::printStr(...):NULL arg.\n");
-    }
+void printStr(char *toPrint) {
+  if (toPrint) {
+    fprintf(stderr, "%s", toPrint);
+  }
+  else {
+    fprintf(stderr, "::printStr(...):NULL arg.\n");
+  }
 }
 
 
@@ -319,17 +320,17 @@ void printStr(char* toPrint) {
 /// is calculated from the supplied OurBaseException_t::unwindException
 /// member address. Handles (ignores), NULL pointers.
 /// @param expToDelete exception to delete
-void deleteOurException(OurUnwindException* expToDelete) {
+void deleteOurException(OurUnwindException *expToDelete) {
 #ifdef DEBUG
-    fprintf(stderr,
-            "deleteOurException(...).\n");
+  fprintf(stderr,
+          "deleteOurException(...).\n");
 #endif
-
-    if (expToDelete &&
-        (expToDelete->exception_class == ourBaseExceptionClass)) {
-
-        free(((char*) expToDelete) + ourBaseFromUnwindOffset);
-    }
+  
+  if (expToDelete &&
+      (expToDelete->exception_class == ourBaseExceptionClass)) {
+    
+    free(((char*) expToDelete) + ourBaseFromUnwindOffset);
+  }
 }
 
 
@@ -340,27 +341,27 @@ void deleteOurException(OurUnwindException* expToDelete) {
 /// @unlink
 /// @param expToDelete exception instance to delete
 void deleteFromUnwindOurException(_Unwind_Reason_Code reason,
-                                  OurUnwindException* expToDelete) {
+                                  OurUnwindException *expToDelete) {
 #ifdef DEBUG
-    fprintf(stderr,
-            "deleteFromUnwindOurException(...).\n");
+  fprintf(stderr,
+          "deleteFromUnwindOurException(...).\n");
 #endif
-
-    deleteOurException(expToDelete);
+  
+  deleteOurException(expToDelete);
 }
 
 
 /// Creates (allocates on the heap), an exception (OurException instance),
 /// of the supplied type info type.
 /// @param type type info type
-OurUnwindException* createOurException(int type) {
-    size_t size = sizeof(OurException);
-    OurException* ret = (OurException*) memset(malloc(size), 0, size);
-    (ret->type).type = type;
-    (ret->unwindException).exception_class = ourBaseExceptionClass;
-    (ret->unwindException).exception_cleanup = deleteFromUnwindOurException;
-
-    return(&(ret->unwindException));
+OurUnwindException *createOurException(int type) {
+  size_t size = sizeof(OurException);
+  OurException *ret = (OurException*) memset(malloc(size), 0, size);
+  (ret->type).type = type;
+  (ret->unwindException).exception_class = ourBaseExceptionClass;
+  (ret->unwindException).exception_cleanup = deleteFromUnwindOurException;
+  
+  return(&(ret->unwindException));
 }
 
 
@@ -369,22 +370,22 @@ OurUnwindException* createOurException(int type) {
 /// @link http://dwarfstd.org/Dwarf3.pdf @unlink
 /// @param data reference variable holding memory pointer to decode from
 /// @returns decoded value
-static uintptr_t readULEB128(const uint8_t** data) {
-    uintptr_t result = 0;
-    uintptr_t shift = 0;
-    unsigned char byte;
-    const uint8_t* p = *data;
-
-    do {
-        byte = *p++;
-        result |= (byte & 0x7f) << shift;
-        shift += 7;
-    } 
-    while (byte & 0x80);
-
-    *data = p;
-
-    return result;
+static uintptr_t readULEB128(const uint8_t **data) {
+  uintptr_t result = 0;
+  uintptr_t shift = 0;
+  unsigned char byte;
+  const uint8_t *p = *data;
+  
+  do {
+    byte = *p++;
+    result |= (byte & 0x7f) << shift;
+    shift += 7;
+  } 
+  while (byte & 0x80);
+  
+  *data = p;
+  
+  return result;
 }
 
 
@@ -393,26 +394,26 @@ static uintptr_t readULEB128(const uint8_t** data) {
 /// @link http://dwarfstd.org/Dwarf3.pdf @unlink
 /// @param data reference variable holding memory pointer to decode from
 /// @returns decoded value
-static uintptr_t readSLEB128(const uint8_t** data) {
-    uintptr_t result = 0;
-    uintptr_t shift = 0;
-    unsigned char byte;
-    const uint8_t* p = *data;
-
-    do {
-        byte = *p++;
-        result |= (byte & 0x7f) << shift;
-        shift += 7;
-    } 
-    while (byte & 0x80);
-
-    *data = p;
-
-    if ((byte & 0x40) && (shift < (sizeof(result) << 3))) {
-        result |= (~0 << shift);
-    }
-
-    return result;
+static uintptr_t readSLEB128(const uint8_t **data) {
+  uintptr_t result = 0;
+  uintptr_t shift = 0;
+  unsigned char byte;
+  const uint8_t *p = *data;
+  
+  do {
+    byte = *p++;
+    result |= (byte & 0x7f) << shift;
+    shift += 7;
+  } 
+  while (byte & 0x80);
+  
+  *data = p;
+  
+  if ((byte & 0x40) && (shift < (sizeof(result) << 3))) {
+    result |= (~0 << shift);
+  }
+  
+  return result;
 }
 
 
@@ -422,82 +423,82 @@ static uintptr_t readSLEB128(const uint8_t** data) {
 /// @param data reference variable holding memory pointer to decode from
 /// @param encoding dwarf encoding type
 /// @returns decoded value
-static uintptr_t readEncodedPointer(const uint8_t** data, uint8_t encoding) {
-    uintptr_t result = 0;
-    const uint8_t* p = *data;
-
-    if (encoding == llvm::dwarf::DW_EH_PE_omit) 
-        return(result);
-
-    // first get value 
-    switch (encoding & 0x0F) {
-        case llvm::dwarf::DW_EH_PE_absptr:
-            result = *((uintptr_t*)p);
-            p += sizeof(uintptr_t);
-            break;
-        case llvm::dwarf::DW_EH_PE_uleb128:
-            result = readULEB128(&p);
-            break;
-        // Note: This case has not been tested
-        case llvm::dwarf::DW_EH_PE_sleb128:
-            result = readSLEB128(&p);
-            break;
-        case llvm::dwarf::DW_EH_PE_udata2:
-            result = *((uint16_t*)p);
-            p += sizeof(uint16_t);
-            break;
-        case llvm::dwarf::DW_EH_PE_udata4:
-            result = *((uint32_t*)p);
-            p += sizeof(uint32_t);
-            break;
-        case llvm::dwarf::DW_EH_PE_udata8:
-            result = *((uint64_t*)p);
-            p += sizeof(uint64_t);
-            break;
-        case llvm::dwarf::DW_EH_PE_sdata2:
-            result = *((int16_t*)p);
-            p += sizeof(int16_t);
-            break;
-        case llvm::dwarf::DW_EH_PE_sdata4:
-            result = *((int32_t*)p);
-            p += sizeof(int32_t);
-            break;
-        case llvm::dwarf::DW_EH_PE_sdata8:
-            result = *((int64_t*)p);
-            p += sizeof(int64_t);
-            break;
-        default:
-            // not supported 
-            abort();
-            break;
-    }
-
-    // then add relative offset 
-    switch (encoding & 0x70) {
-        case llvm::dwarf::DW_EH_PE_absptr:
-            // do nothing 
-            break;
-        case llvm::dwarf::DW_EH_PE_pcrel:
-            result += (uintptr_t)(*data);
-            break;
-        case llvm::dwarf::DW_EH_PE_textrel:
-        case llvm::dwarf::DW_EH_PE_datarel:
-        case llvm::dwarf::DW_EH_PE_funcrel:
-        case llvm::dwarf::DW_EH_PE_aligned:
-        default:
-            // not supported 
-            abort();
-            break;
-    }
-
-    // then apply indirection 
-    if (encoding & llvm::dwarf::DW_EH_PE_indirect) {
-        result = *((uintptr_t*)result);
-    }
-
-    *data = p;
-
-    return result;
+static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) {
+  uintptr_t result = 0;
+  const uint8_t *p = *data;
+  
+  if (encoding == llvm::dwarf::DW_EH_PE_omit) 
+    return(result);
+  
+  // first get value 
+  switch (encoding & 0x0F) {
+    case llvm::dwarf::DW_EH_PE_absptr:
+      result = *((uintptr_t*)p);
+      p += sizeof(uintptr_t);
+      break;
+    case llvm::dwarf::DW_EH_PE_uleb128:
+      result = readULEB128(&p);
+      break;
+      // Note: This case has not been tested
+    case llvm::dwarf::DW_EH_PE_sleb128:
+      result = readSLEB128(&p);
+      break;
+    case llvm::dwarf::DW_EH_PE_udata2:
+      result = *((uint16_t*)p);
+      p += sizeof(uint16_t);
+      break;
+    case llvm::dwarf::DW_EH_PE_udata4:
+      result = *((uint32_t*)p);
+      p += sizeof(uint32_t);
+      break;
+    case llvm::dwarf::DW_EH_PE_udata8:
+      result = *((uint64_t*)p);
+      p += sizeof(uint64_t);
+      break;
+    case llvm::dwarf::DW_EH_PE_sdata2:
+      result = *((int16_t*)p);
+      p += sizeof(int16_t);
+      break;
+    case llvm::dwarf::DW_EH_PE_sdata4:
+      result = *((int32_t*)p);
+      p += sizeof(int32_t);
+      break;
+    case llvm::dwarf::DW_EH_PE_sdata8:
+      result = *((int64_t*)p);
+      p += sizeof(int64_t);
+      break;
+    default:
+      // not supported 
+      abort();
+      break;
+  }
+  
+  // then add relative offset 
+  switch (encoding & 0x70) {
+    case llvm::dwarf::DW_EH_PE_absptr:
+      // do nothing 
+      break;
+    case llvm::dwarf::DW_EH_PE_pcrel:
+      result += (uintptr_t)(*data);
+      break;
+    case llvm::dwarf::DW_EH_PE_textrel:
+    case llvm::dwarf::DW_EH_PE_datarel:
+    case llvm::dwarf::DW_EH_PE_funcrel:
+    case llvm::dwarf::DW_EH_PE_aligned:
+    default:
+      // not supported 
+      abort();
+      break;
+  }
+  
+  // then apply indirection 
+  if (encoding & llvm::dwarf::DW_EH_PE_indirect) {
+    result = *((uintptr_t*)result);
+  }
+  
+  *data = p;
+  
+  return result;
 }
 
 
@@ -524,74 +525,74 @@ static bool handleActionValue(int64_t *resultAction,
                               uintptr_t actionEntry, 
                               uint64_t exceptionClass, 
                               struct _Unwind_Exception *exceptionObject) {
-    bool ret = false;
-
-    if (!resultAction || 
-        !exceptionObject || 
-        (exceptionClass != ourBaseExceptionClass))
-        return(ret);
-
-    struct OurBaseException_t* excp = (struct OurBaseException_t*)
-                        (((char*) exceptionObject) + ourBaseFromUnwindOffset);
-    struct OurExceptionType_t *excpType = &(excp->type);
-    int type = excpType->type;
-
+  bool ret = false;
+  
+  if (!resultAction || 
+      !exceptionObject || 
+      (exceptionClass != ourBaseExceptionClass))
+    return(ret);
+  
+  struct OurBaseException_t *excp = (struct OurBaseException_t*)
+  (((char*) exceptionObject) + ourBaseFromUnwindOffset);
+  struct OurExceptionType_t *excpType = &(excp->type);
+  int type = excpType->type;
+  
 #ifdef DEBUG
-    fprintf(stderr,
-            "handleActionValue(...): exceptionObject = <%p>, "
-                "excp = <%p>.\n",
-            exceptionObject,
-            excp);
+  fprintf(stderr,
+          "handleActionValue(...): exceptionObject = <%p>, "
+          "excp = <%p>.\n",
+          exceptionObject,
+          excp);
 #endif
-
-    const uint8_t *actionPos = (uint8_t*) actionEntry,
-                  *tempActionPos;
-    int64_t typeOffset = 0,
-            actionOffset;
-
-    for (int i = 0; true; ++i) {
-        // Each emitted dwarf action corresponds to a 2 tuple of
-        // type info address offset, and action offset to the next
-        // emitted action.
-        typeOffset = readSLEB128(&actionPos);
-        tempActionPos = actionPos;
-        actionOffset = readSLEB128(&tempActionPos);
-
+  
+  const uint8_t *actionPos = (uint8_t*) actionEntry,
+  *tempActionPos;
+  int64_t typeOffset = 0,
+  actionOffset;
+  
+  for (int i = 0; true; ++i) {
+    // Each emitted dwarf action corresponds to a 2 tuple of
+    // type info address offset, and action offset to the next
+    // emitted action.
+    typeOffset = readSLEB128(&actionPos);
+    tempActionPos = actionPos;
+    actionOffset = readSLEB128(&tempActionPos);
+    
 #ifdef DEBUG
-        fprintf(stderr,
-                "handleActionValue(...):typeOffset: <%lld>, "
-                    "actionOffset: <%lld>.\n",
-                typeOffset,
-                actionOffset);
+    fprintf(stderr,
+            "handleActionValue(...):typeOffset: <%lld>, "
+            "actionOffset: <%lld>.\n",
+            typeOffset,
+            actionOffset);
 #endif
-        assert((typeOffset >= 0) && 
-               "handleActionValue(...):filters are not supported.");
-
-        // Note: A typeOffset == 0 implies that a cleanup llvm.eh.selector
-        //       argument has been matched.
-        if ((typeOffset > 0) &&
-            (type == (classInfo[-typeOffset])->type)) {
+    assert((typeOffset >= 0) && 
+           "handleActionValue(...):filters are not supported.");
+    
+    // Note: A typeOffset == 0 implies that a cleanup llvm.eh.selector
+    //       argument has been matched.
+    if ((typeOffset > 0) &&
+        (type == (classInfo[-typeOffset])->type)) {
 #ifdef DEBUG
-            fprintf(stderr,
-                    "handleActionValue(...):actionValue <%d> found.\n",
-                    i);
+      fprintf(stderr,
+              "handleActionValue(...):actionValue <%d> found.\n",
+              i);
 #endif
-            *resultAction = i + 1;
-            ret = true;
-            break;
-        }
-
+      *resultAction = i + 1;
+      ret = true;
+      break;
+    }
+    
 #ifdef DEBUG
-        fprintf(stderr,
-                "handleActionValue(...):actionValue not found.\n");
+    fprintf(stderr,
+            "handleActionValue(...):actionValue not found.\n");
 #endif
-        if (!actionOffset)
-            break;
-
-        actionPos += actionOffset;
-    }
-
-    return(ret);
+    if (!actionOffset)
+      break;
+    
+    actionPos += actionOffset;
+  }
+  
+  return(ret);
 }
 
 
@@ -607,180 +608,177 @@ static bool handleActionValue(int64_t *resultAction,
 /// @param context unwind system context
 /// @returns minimally supported unwinding control indicator 
 static _Unwind_Reason_Code handleLsda(int version, 
-                                  const uint8_t* lsda,
-                                  _Unwind_Action actions,
-                                  uint64_t exceptionClass, 
-                                  struct _Unwind_Exception* exceptionObject,
-                                  _Unwind_Context_t context) {
-    _Unwind_Reason_Code ret = _URC_CONTINUE_UNWIND;
-
-    if (!lsda)
-        return(ret);
-
+                                      const uint8_t *lsda,
+                                      _Unwind_Action actions,
+                                      uint64_t exceptionClass, 
+                                    struct _Unwind_Exception *exceptionObject,
+                                      _Unwind_Context_t context) {
+  _Unwind_Reason_Code ret = _URC_CONTINUE_UNWIND;
+  
+  if (!lsda)
+    return(ret);
+  
 #ifdef DEBUG
-    fprintf(stderr, 
-            "handleLsda(...):lsda is non-zero.\n");
+  fprintf(stderr, 
+          "handleLsda(...):lsda is non-zero.\n");
 #endif
-
-    // Get the current instruction pointer and offset it before next
-    // instruction in the current frame which threw the exception.
-    uintptr_t pc = _Unwind_GetIP(context)-1;
-
-    // Get beginning current frame's code (as defined by the 
-    // emitted dwarf code)
-    uintptr_t funcStart = _Unwind_GetRegionStart(context);
-    uintptr_t pcOffset = pc - funcStart;
-    struct OurExceptionType_t** classInfo = NULL;
-
-    // Note: See JITDwarfEmitter::EmitExceptionTable(...) for corresponding
-    //       dwarf emission
-
-    // Parse LSDA header.
-    uint8_t lpStartEncoding = *lsda++;
-
-    if (lpStartEncoding != llvm::dwarf::DW_EH_PE_omit) {
-        readEncodedPointer(&lsda, lpStartEncoding); 
-    }
-
-    uint8_t ttypeEncoding = *lsda++;
-    uintptr_t classInfoOffset;
-
-    if (ttypeEncoding != llvm::dwarf::DW_EH_PE_omit) {
-        // Calculate type info locations in emitted dwarf code which
-        // were flagged by type info arguments to llvm.eh.selector
-        // intrinsic
-        classInfoOffset = readULEB128(&lsda);
-        classInfo = (struct OurExceptionType_t**) (lsda + classInfoOffset);
-    }
-
-    // Walk call-site table looking for range that 
-    // includes current PC. 
-
-    uint8_t         callSiteEncoding = *lsda++;
-    uint32_t        callSiteTableLength = readULEB128(&lsda);
-    const uint8_t*  callSiteTableStart = lsda;
-    const uint8_t*  callSiteTableEnd = callSiteTableStart + 
-                                                    callSiteTableLength;
-    const uint8_t*  actionTableStart = callSiteTableEnd;
-    const uint8_t*  callSitePtr = callSiteTableStart;
-
-    bool foreignException = false;
-
-    while (callSitePtr < callSiteTableEnd) {
-        uintptr_t start = readEncodedPointer(&callSitePtr, 
-                                             callSiteEncoding);
-        uintptr_t length = readEncodedPointer(&callSitePtr, 
+  
+  // Get the current instruction pointer and offset it before next
+  // instruction in the current frame which threw the exception.
+  uintptr_t pc = _Unwind_GetIP(context)-1;
+  
+  // Get beginning current frame's code (as defined by the 
+  // emitted dwarf code)
+  uintptr_t funcStart = _Unwind_GetRegionStart(context);
+  uintptr_t pcOffset = pc - funcStart;
+  struct OurExceptionType_t **classInfo = NULL;
+  
+  // Note: See JITDwarfEmitter::EmitExceptionTable(...) for corresponding
+  //       dwarf emission
+  
+  // Parse LSDA header.
+  uint8_t lpStartEncoding = *lsda++;
+  
+  if (lpStartEncoding != llvm::dwarf::DW_EH_PE_omit) {
+    readEncodedPointer(&lsda, lpStartEncoding); 
+  }
+  
+  uint8_t ttypeEncoding = *lsda++;
+  uintptr_t classInfoOffset;
+  
+  if (ttypeEncoding != llvm::dwarf::DW_EH_PE_omit) {
+    // Calculate type info locations in emitted dwarf code which
+    // were flagged by type info arguments to llvm.eh.selector
+    // intrinsic
+    classInfoOffset = readULEB128(&lsda);
+    classInfo = (struct OurExceptionType_t**) (lsda + classInfoOffset);
+  }
+  
+  // Walk call-site table looking for range that 
+  // includes current PC. 
+  
+  uint8_t         callSiteEncoding = *lsda++;
+  uint32_t        callSiteTableLength = readULEB128(&lsda);
+  const uint8_t   *callSiteTableStart = lsda;
+  const uint8_t   *callSiteTableEnd = callSiteTableStart + 
+  callSiteTableLength;
+  const uint8_t   *actionTableStart = callSiteTableEnd;
+  const uint8_t   *callSitePtr = callSiteTableStart;
+  
+  bool foreignException = false;
+  
+  while (callSitePtr < callSiteTableEnd) {
+    uintptr_t start = readEncodedPointer(&callSitePtr, 
+                                         callSiteEncoding);
+    uintptr_t length = readEncodedPointer(&callSitePtr, 
+                                          callSiteEncoding);
+    uintptr_t landingPad = readEncodedPointer(&callSitePtr, 
                                               callSiteEncoding);
-        uintptr_t landingPad = readEncodedPointer(&callSitePtr, 
-                                                  callSiteEncoding);
-
-        // Note: Action value
-        uintptr_t actionEntry = readULEB128(&callSitePtr);
-
-        if (exceptionClass != ourBaseExceptionClass) {
-            // We have been notified of a foreign exception being thrown,
-            // and we therefore need to execute cleanup landing pads
-            actionEntry = 0;
-            foreignException = true;
-        }
-
-        if (landingPad == 0) {
+    
+    // Note: Action value
+    uintptr_t actionEntry = readULEB128(&callSitePtr);
+    
+    if (exceptionClass != ourBaseExceptionClass) {
+      // We have been notified of a foreign exception being thrown,
+      // and we therefore need to execute cleanup landing pads
+      actionEntry = 0;
+      foreignException = true;
+    }
+    
+    if (landingPad == 0) {
 #ifdef DEBUG
-            fprintf(stderr,
-                    "handleLsda(...): No landing pad found.\n");
+      fprintf(stderr,
+              "handleLsda(...): No landing pad found.\n");
 #endif
-
-            continue; // no landing pad for this entry
-        }
-
-        if (actionEntry) {
-            actionEntry += ((uintptr_t) actionTableStart) - 1;
-        }
-        else {
+      
+      continue; // no landing pad for this entry
+    }
+    
+    if (actionEntry) {
+      actionEntry += ((uintptr_t) actionTableStart) - 1;
+    }
+    else {
 #ifdef DEBUG
-            fprintf(stderr,
-                    "handleLsda(...):No action table found.\n");
+      fprintf(stderr,
+              "handleLsda(...):No action table found.\n");
 #endif
-        }
-
-        bool exceptionMatched = false;
-
-        if ((start <= pcOffset) && (pcOffset < (start + length))) {
+    }
+    
+    bool exceptionMatched = false;
+    
+    if ((start <= pcOffset) && (pcOffset < (start + length))) {
 #ifdef DEBUG
-            fprintf(stderr,
-                    "handleLsda(...): Landing pad found.\n");
+      fprintf(stderr,
+              "handleLsda(...): Landing pad found.\n");
 #endif
-            int64_t actionValue = 0;
-
-            if (actionEntry) {
-                exceptionMatched = handleActionValue
-                                   (
-                                       &actionValue,
-                                       classInfo, 
-                                       actionEntry, 
-                                       exceptionClass, 
-                                       exceptionObject
-                                   );
-            }
-
-            if (!(actions & _UA_SEARCH_PHASE)) {
+      int64_t actionValue = 0;
+      
+      if (actionEntry) {
+        exceptionMatched = handleActionValue(&actionValue,
+                                             classInfo, 
+                                             actionEntry, 
+                                             exceptionClass, 
+                                             exceptionObject);
+      }
+      
+      if (!(actions & _UA_SEARCH_PHASE)) {
 #ifdef DEBUG
-                fprintf(stderr,
-                        "handleLsda(...): installed landing pad "
-                            "context.\n");
+        fprintf(stderr,
+                "handleLsda(...): installed landing pad "
+                "context.\n");
 #endif
-
-                // Found landing pad for the PC.
-                // Set Instruction Pointer to so we re-enter function 
-                // at landing pad. The landing pad is created by the 
-                // compiler to take two parameters in registers.
-                _Unwind_SetGR(context, 
-                              __builtin_eh_return_data_regno(0), 
-                              (uintptr_t)exceptionObject);
-
-                // Note: this virtual register directly corresponds
-                //       to the return of the llvm.eh.selector intrinsic
-                if (!actionEntry || !exceptionMatched) {
-                    // We indicate cleanup only
-                    _Unwind_SetGR(context, 
-                                  __builtin_eh_return_data_regno(1), 
-                                  0);
-                }
-                else {
-                    // Matched type info index of llvm.eh.selector intrinsic
-                    // passed here.
-                    _Unwind_SetGR(context, 
-                                  __builtin_eh_return_data_regno(1), 
-                                  actionValue);
-                }
-
-                // To execute landing pad set here
-                _Unwind_SetIP(context, funcStart + landingPad);
-                ret = _URC_INSTALL_CONTEXT;
-            }
-            else if (exceptionMatched) {
+        
+        // Found landing pad for the PC.
+        // Set Instruction Pointer to so we re-enter function 
+        // at landing pad. The landing pad is created by the 
+        // compiler to take two parameters in registers.
+        _Unwind_SetGR(context, 
+                      __builtin_eh_return_data_regno(0), 
+                      (uintptr_t)exceptionObject);
+        
+        // Note: this virtual register directly corresponds
+        //       to the return of the llvm.eh.selector intrinsic
+        if (!actionEntry || !exceptionMatched) {
+          // We indicate cleanup only
+          _Unwind_SetGR(context, 
+                        __builtin_eh_return_data_regno(1), 
+                        0);
+        }
+        else {
+          // Matched type info index of llvm.eh.selector intrinsic
+          // passed here.
+          _Unwind_SetGR(context, 
+                        __builtin_eh_return_data_regno(1), 
+                        actionValue);
+        }
+        
+        // To execute landing pad set here
+        _Unwind_SetIP(context, funcStart + landingPad);
+        ret = _URC_INSTALL_CONTEXT;
+      }
+      else if (exceptionMatched) {
 #ifdef DEBUG
-                fprintf(stderr,
-                        "handleLsda(...): setting handler found.\n");
+        fprintf(stderr,
+                "handleLsda(...): setting handler found.\n");
 #endif
-                ret = _URC_HANDLER_FOUND;
-            }
-            else {
-                // Note: Only non-clean up handlers are marked as
-                //       found. Otherwise the clean up handlers will be 
-                //       re-found and executed during the clean up 
-                //       phase.
+        ret = _URC_HANDLER_FOUND;
+      }
+      else {
+        // Note: Only non-clean up handlers are marked as
+        //       found. Otherwise the clean up handlers will be 
+        //       re-found and executed during the clean up 
+        //       phase.
 #ifdef DEBUG
-                fprintf(stderr,
-                        "handleLsda(...): cleanup handler found.\n");
+        fprintf(stderr,
+                "handleLsda(...): cleanup handler found.\n");
 #endif
-            }
-
-            break;
-        }
+      }
+      
+      break;
     }
-
-    return(ret);
+  }
+  
+  return(ret);
 }
 
 
@@ -796,38 +794,38 @@ static _Unwind_Reason_Code handleLsda(int version,
 /// @param context unwind system context
 /// @returns minimally supported unwinding control indicator 
 _Unwind_Reason_Code ourPersonality(int version, 
-                               _Unwind_Action actions,
-                               uint64_t exceptionClass, 
-                               struct _Unwind_Exception* exceptionObject,
-                               _Unwind_Context_t context) {
+                                   _Unwind_Action actions,
+                                   uint64_t exceptionClass, 
+                                   struct _Unwind_Exception *exceptionObject,
+                                   _Unwind_Context_t context) {
 #ifdef DEBUG
-    fprintf(stderr, 
-            "We are in ourPersonality(...):actions is <%d>.\n",
-            actions);
-
-    if (actions & _UA_SEARCH_PHASE) {
-        fprintf(stderr, "ourPersonality(...):In search phase.\n");
-    }
-    else {
-        fprintf(stderr, "ourPersonality(...):In non-search phase.\n");
-    }
+  fprintf(stderr, 
+          "We are in ourPersonality(...):actions is <%d>.\n",
+          actions);
+  
+  if (actions & _UA_SEARCH_PHASE) {
+    fprintf(stderr, "ourPersonality(...):In search phase.\n");
+  }
+  else {
+    fprintf(stderr, "ourPersonality(...):In non-search phase.\n");
+  }
 #endif
-
-    const uint8_t* lsda = _Unwind_GetLanguageSpecificData(context);
-
+  
+  const uint8_t *lsda = _Unwind_GetLanguageSpecificData(context);
+  
 #ifdef DEBUG
-    fprintf(stderr, 
-            "ourPersonality(...):lsda = <%p>.\n",
-            lsda);
+  fprintf(stderr, 
+          "ourPersonality(...):lsda = <%p>.\n",
+          lsda);
 #endif
-
-    // The real work of the personality function is captured here
-    return(handleLsda(version,
-                      lsda,
-                      actions,
-                      exceptionClass,
-                      exceptionObject,
-                      context));
+  
+  // The real work of the personality function is captured here
+  return(handleLsda(version,
+                    lsda,
+                    actions,
+                    exceptionClass,
+                    exceptionObject,
+                    context));
 }
 
 
@@ -840,14 +838,14 @@ _Unwind_Reason_Code ourPersonality(int version,
 /// @returns class value
 uint64_t genClass(const unsigned char classChars[], size_t classCharsSize)
 {
-    uint64_t ret = classChars[0];
-
-    for (unsigned i = 1; i < classCharsSize; ++i) {
-        ret <<= 8;
-        ret += classChars[i];
-    }
-
-    return(ret);
+  uint64_t ret = classChars[0];
+  
+  for (unsigned i = 1; i < classCharsSize; ++i) {
+    ret <<= 8;
+    ret += classChars[i];
+  }
+  
+  return(ret);
 }
 
 } // extern "C"
@@ -869,36 +867,36 @@ uint64_t genClass(const unsigned char classChars[], size_t classCharsSize)
 ///        generated, and is used to hold the constant string. A value of 
 ///        false indicates that the constant string will be stored on the 
 ///        stack.
-void generateStringPrint(llvm::LLVMContext& context, 
-                         llvm::Module& module,
-                         llvm::IRBuilder<>& builder, 
+void generateStringPrint(llvm::LLVMContext &context, 
+                         llvm::Module &module,
+                         llvm::IRBuilder<> &builder, 
                          std::string toPrint,
                          bool useGlobal = true) {
-    llvm::Function *printFunct = module.getFunction("printStr");
-
-    llvm::Value *stringVar;
-    llvm::Constant* stringConstant = 
-        llvm::ConstantArray::get(context, toPrint);
-
-    if (useGlobal) {
-        // Note: Does not work without allocation
-        stringVar = 
-            new llvm::GlobalVariable(module, 
-                                     stringConstant->getType(),
-                                     true, 
-                                     llvm::GlobalValue::LinkerPrivateLinkage, 
-                                     stringConstant, 
-                                     "");
-    }
-    else {
-        stringVar = builder.CreateAlloca(stringConstant->getType());
-        builder.CreateStore(stringConstant, stringVar);
-    }
-
-    llvm::Value* cast = 
-        builder.CreatePointerCast(stringVar, 
-                                  builder.getInt8Ty()->getPointerTo());
-    builder.CreateCall(printFunct, cast);
+  llvm::Function *printFunct = module.getFunction("printStr");
+  
+  llvm::Value *stringVar;
+  llvm::Constant *stringConstant = 
+  llvm::ConstantArray::get(context, toPrint);
+  
+  if (useGlobal) {
+    // Note: Does not work without allocation
+    stringVar = 
+    new llvm::GlobalVariable(module, 
+                             stringConstant->getType(),
+                             true, 
+                             llvm::GlobalValue::LinkerPrivateLinkage, 
+                             stringConstant, 
+                             "");
+  }
+  else {
+    stringVar = builder.CreateAlloca(stringConstant->getType());
+    builder.CreateStore(stringConstant, stringVar);
+  }
+  
+  llvm::Value *cast = 
+  builder.CreatePointerCast(stringVar, 
+                            builder.getInt8Ty()->getPointerTo());
+  builder.CreateCall(printFunct, cast);
 }
 
 
@@ -914,35 +912,35 @@ void generateStringPrint(llvm::LLVMContext& context,
 ///        generated, and is used to hold the constant string. A value of 
 ///        false indicates that the constant string will be stored on the 
 ///        stack.
-void generateIntegerPrint(llvm::LLVMContext& context, 
-                          llvm::Module& module,
-                          llvm::IRBuilder<>& builder, 
-                          llvm::Function& printFunct,
-                          llvm::Value& toPrint,
+void generateIntegerPrint(llvm::LLVMContext &context, 
+                          llvm::Module &module,
+                          llvm::IRBuilder<> &builder, 
+                          llvm::Function &printFunct,
+                          llvm::Value &toPrint,
                           std::string format, 
                           bool useGlobal = true) {
-    llvm::Constant *stringConstant = llvm::ConstantArray::get(context, format);
-    llvm::Value *stringVar;
-
-    if (useGlobal) {
-        // Note: Does not seem to work without allocation
-        stringVar = 
-            new llvm::GlobalVariable(module, 
-                                     stringConstant->getType(),
-                                    true, 
-                                     llvm::GlobalValue::LinkerPrivateLinkage, 
-                                     stringConstant, 
-                                     "");
-    }
-    else {
-        stringVar = builder.CreateAlloca(stringConstant->getType());
-        builder.CreateStore(stringConstant, stringVar);
-    }
-
-    llvm::Value* cast = 
-        builder.CreateBitCast(stringVar, 
-                              builder.getInt8Ty()->getPointerTo());
-    builder.CreateCall2(&printFunct, &toPrint, cast);
+  llvm::Constant *stringConstant = llvm::ConstantArray::get(context, format);
+  llvm::Value *stringVar;
+  
+  if (useGlobal) {
+    // Note: Does not seem to work without allocation
+    stringVar = 
+    new llvm::GlobalVariable(module, 
+                             stringConstant->getType(),
+                             true, 
+                             llvm::GlobalValue::LinkerPrivateLinkage, 
+                             stringConstant, 
+                             "");
+  }
+  else {
+    stringVar = builder.CreateAlloca(stringConstant->getType());
+    builder.CreateStore(stringConstant, stringVar);
+  }
+  
+  llvm::Value *cast = 
+  builder.CreateBitCast(stringVar, 
+                        builder.getInt8Ty()->getPointerTo());
+  builder.CreateCall2(&printFunct, &toPrint, cast);
 }
 
 
@@ -965,64 +963,61 @@ void generateIntegerPrint(llvm::LLVMContext& context,
 /// @param exceptionCaughtFlag reference exception caught/thrown status storage
 /// @param exceptionStorage reference to exception pointer storage
 /// @returns newly created block
-static llvm::BasicBlock* createFinallyBlock(llvm::LLVMContext& context, 
-                             llvm::Module& module, 
-                             llvm::IRBuilder<>& builder, 
-                             llvm::Function& toAddTo,
-                             std::string& blockName,
-                             std::string& functionId,
-                             llvm::BasicBlock& terminatorBlock,
-                             llvm::BasicBlock& unwindResumeBlock,
-                             llvm::Value** exceptionCaughtFlag,
-                             llvm::Value** exceptionStorage) {
-    assert(exceptionCaughtFlag && 
-           "ExceptionDemo::createFinallyBlock(...):exceptionCaughtFlag "
-               "is NULL");
-    assert(exceptionStorage && 
-           "ExceptionDemo::createFinallyBlock(...):exceptionStorage "
-               "is NULL");
-
-    *exceptionCaughtFlag = 
-        createEntryBlockAlloca(toAddTo,
-                               "exceptionCaught",
-                               ourExceptionNotThrownState->getType(),
-                               ourExceptionNotThrownState);
-
-    const llvm::PointerType* exceptionStorageType = 
-                                builder.getInt8Ty()->getPointerTo();
-    *exceptionStorage = 
-        createEntryBlockAlloca(toAddTo,
-                               "exceptionStorage",
-                               exceptionStorageType,
-                               llvm::ConstantPointerNull::get(
-                                   exceptionStorageType));
-
-    llvm::BasicBlock *ret = llvm::BasicBlock::Create(context,
-                                                     blockName,
-                                                     &toAddTo);
-
-    builder.SetInsertPoint(ret);
-   
-    std::ostringstream bufferToPrint;
-    bufferToPrint << "Gen: Executing finally block "
-                  << blockName
-                  << " in "
-                  << functionId
-                  << std::endl;
-    generateStringPrint(context, 
-                        module, 
-                        builder, 
-                        bufferToPrint.str(),
-                        USE_GLOBAL_STR_CONSTS);
-
-    llvm::SwitchInst* theSwitch = 
-        builder.CreateSwitch(builder.CreateLoad(*exceptionCaughtFlag), 
-                             &terminatorBlock,
-                             2);
-    theSwitch->addCase(ourExceptionCaughtState, &terminatorBlock);
-    theSwitch->addCase(ourExceptionThrownState, &unwindResumeBlock);
-
-    return(ret);
+static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context, 
+                                            llvm::Module &module, 
+                                            llvm::IRBuilder<> &builder, 
+                                            llvm::Function &toAddTo,
+                                            std::string &blockName,
+                                            std::string &functionId,
+                                            llvm::BasicBlock &terminatorBlock,
+                                            llvm::BasicBlock &unwindResumeBlock,
+                                            llvm::Value **exceptionCaughtFlag,
+                                            llvm::Value **exceptionStorage) {
+  assert(exceptionCaughtFlag && 
+         "ExceptionDemo::createFinallyBlock(...):exceptionCaughtFlag "
+         "is NULL");
+  assert(exceptionStorage && 
+         "ExceptionDemo::createFinallyBlock(...):exceptionStorage "
+         "is NULL");
+  
+  *exceptionCaughtFlag = 
+  createEntryBlockAlloca(toAddTo,
+                         "exceptionCaught",
+                         ourExceptionNotThrownState->getType(),
+                         ourExceptionNotThrownState);
+  
+  const llvm::PointerType *exceptionStorageType = 
+  builder.getInt8Ty()->getPointerTo();
+  *exceptionStorage = 
+  createEntryBlockAlloca(toAddTo,
+                         "exceptionStorage",
+                         exceptionStorageType,
+                         llvm::ConstantPointerNull::get(
+                                                        exceptionStorageType));
+  
+  llvm::BasicBlock *ret = llvm::BasicBlock::Create(context,
+                                                   blockName,
+                                                   &toAddTo);
+  
+  builder.SetInsertPoint(ret);
+  
+  std::ostringstream bufferToPrint;
+  bufferToPrint << "Gen: Executing finally block "
+    << blockName << " in " << functionId << "\n";
+  generateStringPrint(context, 
+                      module, 
+                      builder, 
+                      bufferToPrint.str(),
+                      USE_GLOBAL_STR_CONSTS);
+  
+  llvm::SwitchInst *theSwitch = 
+  builder.CreateSwitch(builder.CreateLoad(*exceptionCaughtFlag), 
+                       &terminatorBlock,
+                       2);
+  theSwitch->addCase(ourExceptionCaughtState, &terminatorBlock);
+  theSwitch->addCase(ourExceptionThrownState, &unwindResumeBlock);
+  
+  return(ret);
 }
 
 
@@ -1038,36 +1033,36 @@ static llvm::BasicBlock* createFinallyBlock(llvm::LLVMContext& context,
 /// @param terminatorBlock terminator "end" block
 /// @param exceptionCaughtFlag exception caught/thrown status
 /// @returns newly created block
-static llvm::BasicBlock* createCatchBlock(llvm::LLVMContext& context, 
-                                          llvm::Module& module, 
-                                          llvm::IRBuilder<>& builder, 
-                                          llvm::Function& toAddTo,
-                                          std::string& blockName,
-                                          std::string& functionId,
-                                          llvm::BasicBlock& terminatorBlock,
-                                          llvm::Value& exceptionCaughtFlag) {
-
-    llvm::BasicBlock *ret = llvm::BasicBlock::Create(context,
-                                                     blockName,
-                                                     &toAddTo);
-
-    builder.SetInsertPoint(ret);
-
-    std::ostringstream bufferToPrint;
-    bufferToPrint << "Gen: Executing catch block "
-                  << blockName
-                  << " in "
-                  << functionId
-                  << std::endl;
-    generateStringPrint(context, 
-                        module, 
-                        builder, 
-                        bufferToPrint.str(),
-                        USE_GLOBAL_STR_CONSTS);
-    builder.CreateStore(ourExceptionCaughtState, &exceptionCaughtFlag);
-    builder.CreateBr(&terminatorBlock);
-
-    return(ret);
+static llvm::BasicBlock *createCatchBlock(llvm::LLVMContext &context, 
+                                          llvm::Module &module, 
+                                          llvm::IRBuilder<> &builder, 
+                                          llvm::Function &toAddTo,
+                                          std::string &blockName,
+                                          std::string &functionId,
+                                          llvm::BasicBlock &terminatorBlock,
+                                          llvm::Value &exceptionCaughtFlag) {
+  
+  llvm::BasicBlock *ret = llvm::BasicBlock::Create(context,
+                                                   blockName,
+                                                   &toAddTo);
+  
+  builder.SetInsertPoint(ret);
+  
+  std::ostringstream bufferToPrint;
+  bufferToPrint << "Gen: Executing catch block "
+  << blockName
+  << " in "
+  << functionId
+  << std::endl;
+  generateStringPrint(context, 
+                      module, 
+                      builder, 
+                      bufferToPrint.str(),
+                      USE_GLOBAL_STR_CONSTS);
+  builder.CreateStore(ourExceptionCaughtState, &exceptionCaughtFlag);
+  builder.CreateBr(&terminatorBlock);
+  
+  return(ret);
 }
 
 
@@ -1091,275 +1086,269 @@ static llvm::BasicBlock* createCatchBlock(llvm::LLVMContext& context,
 /// @param exceptionTypesToCatch array of type info types to "catch"
 /// @returns generated function
 static
-llvm::Function* createCatchWrappedInvokeFunction(llvm::Module& module, 
-                    llvm::IRBuilder<>& builder, 
-                    llvm::FunctionPassManager& fpm,
-                    llvm::Function& toInvoke,
-                    std::string ourId,
-                    unsigned numExceptionsToCatch,
-                    unsigned exceptionTypesToCatch[]) {
-
-    llvm::LLVMContext& context = module.getContext();
-    llvm::Function *toPrint32Int = module.getFunction("print32Int");
-
-    ArgTypes argTypes;
-    argTypes.push_back(builder.getInt32Ty());
-
-    ArgNames argNames;
-    argNames.push_back("exceptTypeToThrow");
-
-    llvm::Function* ret = createFunction(module, 
-                                         builder.getVoidTy(),
-                                         argTypes, 
-                                         argNames, 
-                                         ourId,
-                                         llvm::Function::ExternalLinkage, 
-                                         false, 
-                                         false);
-
-    // Block which calls invoke
-    llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context,
-                                                            "entry", 
-                                                            ret);
-    // Normal block for invoke
-    llvm::BasicBlock *normalBlock = llvm::BasicBlock::Create(context, 
-                                                             "normal", 
-                                                             ret);
-    // Unwind block for invoke
-    llvm::BasicBlock *exceptionBlock = 
-        llvm::BasicBlock::Create(context, "exception", ret);
-
-    // Block which routes exception to correct catch handler block
-    llvm::BasicBlock *exceptionRouteBlock = 
-        llvm::BasicBlock::Create(context, "exceptionRoute", ret);
-
-    // Foreign exception handler
-    llvm::BasicBlock *externalExceptionBlock = 
-        llvm::BasicBlock::Create(context, "externalException", ret);
-
-    // Block which calls _Unwind_Resume
-    llvm::BasicBlock *unwindResumeBlock = 
-        llvm::BasicBlock::Create(context, "unwindResume", ret);
-
-    // Clean up block which delete exception if needed
-    llvm::BasicBlock *endBlock = 
-        llvm::BasicBlock::Create(context, "end", ret);
-
-    std::string nextName;
-    std::vector<llvm::BasicBlock*> catchBlocks(numExceptionsToCatch);
-    llvm::Value* exceptionCaughtFlag = NULL;
-    llvm::Value* exceptionStorage = NULL;
-
-    // Finally block which will branch to unwindResumeBlock if 
-    // exception is not caught. Initializes/allocates stack locations.
-    llvm::BasicBlock* finallyBlock = createFinallyBlock(context, 
-                                                        module, 
-                                                        builder, 
-                                                        *ret, 
-                                                        nextName = "finally", 
-                                                        ourId,
-                                                        *endBlock,
-                                                        *unwindResumeBlock,
-                                                        &exceptionCaughtFlag,
-                                                        &exceptionStorage);
-
-    for (unsigned i = 0; i < numExceptionsToCatch; ++i) {
-        nextName = ourTypeInfoNames[exceptionTypesToCatch[i]];
-
-        // One catch block per type info to be caught
-        catchBlocks[i] = createCatchBlock(context, 
-                                          module, 
-                                          builder, 
-                                          *ret,
-                                          nextName, 
-                                          ourId,
-                                          *finallyBlock,
-                                          *exceptionCaughtFlag);
-    }
-
-    // Entry Block
-
-    builder.SetInsertPoint(entryBlock);
-
-    std::vector<llvm::Value*> args;
-    args.push_back(namedValues["exceptTypeToThrow"]);
-    builder.CreateInvoke(&toInvoke, 
-                         normalBlock, 
-                         exceptionBlock, 
-                         args.begin(), 
-                         args.end());
-
-    // End Block
-
-    builder.SetInsertPoint(endBlock);
-
-    generateStringPrint(context, 
-                        module,
-                        builder, 
-                        "Gen: In end block: exiting in " + ourId + ".\n",
-                        USE_GLOBAL_STR_CONSTS);
-    llvm::Function *deleteOurException = 
-                                    module.getFunction("deleteOurException");
-
-    // Note: function handles NULL exceptions
-    builder.CreateCall(deleteOurException, 
-                       builder.CreateLoad(exceptionStorage));
-    builder.CreateRetVoid();
-
-    // Normal Block
-
-    builder.SetInsertPoint(normalBlock);
-
-    generateStringPrint(context, 
-                        module,
-                        builder, 
-                        "Gen: No exception in " + ourId + "!\n",
-                        USE_GLOBAL_STR_CONSTS);
-
-    // Finally block is always called
-    builder.CreateBr(finallyBlock);
-
-    // Unwind Resume Block
-
-    builder.SetInsertPoint(unwindResumeBlock);
-
-    llvm::Function *resumeOurException = 
-                module.getFunction("_Unwind_Resume");
-    builder.CreateCall(resumeOurException, 
-                       builder.CreateLoad(exceptionStorage));
-    builder.CreateUnreachable();
-
-    // Exception Block
-
-    builder.SetInsertPoint(exceptionBlock);
-
-    llvm::Function *ehException = module.getFunction("llvm.eh.exception");
-
-    // Retrieve thrown exception
-    llvm::Value* unwindException = builder.CreateCall(ehException);
-
-    // Store exception and flag
-    builder.CreateStore(unwindException, exceptionStorage);
-    builder.CreateStore(ourExceptionThrownState, exceptionCaughtFlag);
-    llvm::Function *personality = module.getFunction("ourPersonality");
-    llvm::Value* functPtr = 
-        builder.CreatePointerCast(personality, 
-                              builder.getInt8Ty()->getPointerTo());
-
-    args.clear();
-    args.push_back(unwindException);
-    args.push_back(functPtr);
-
-    // Note: Skipping index 0
-    for (unsigned i = 0; i < numExceptionsToCatch; ++i) {
-        // Set up type infos to be caught
-        args.push_back(
-            module.getGlobalVariable(
-                ourTypeInfoNames[exceptionTypesToCatch[i]]));
-    }
-
-    args.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), 0));
-
-    llvm::Function *ehSelector = module.getFunction("llvm.eh.selector");
-
-    // Set up this exeption block as the landing pad which will handle
-    // given type infos. See case Intrinsic::eh_selector in 
-    // SelectionDAGBuilder::visitIntrinsicCall(...) and AddCatchInfo(...)
-    // implemented in FunctionLoweringInfo.cpp to see how the implementation
-    // handles this call. This landing pad (this exception block), will be 
-    // called either because it nees to cleanup (call finally) or a type 
-    // info was found which matched the thrown exception.
-    llvm::Value* retTypeInfoIndex = builder.CreateCall(ehSelector, 
-                                                       args.begin(), 
-                                                       args.end());
-
-    // Retrieve exception_class member from thrown exception 
-    // (_Unwind_Exception instance). This member tells us whether or not
-    // the exception is foreign.
-    llvm::Value* unwindExceptionClass = 
-        builder.CreateLoad(
-            builder.CreateStructGEP(
-                builder.CreatePointerCast(
-                    unwindException, 
-                    ourUnwindExceptionType->getPointerTo()), 
-                0));
-
-    // Branch to the externalExceptionBlock if the exception is foreign or
-    // to a catch router if not. Either way the finally block will be run.
-    builder.CreateCondBr(
-        builder.CreateICmpEQ(unwindExceptionClass,
-                             llvm::ConstantInt::get(builder.getInt64Ty(), 
-                                                    ourBaseExceptionClass)),
-        exceptionRouteBlock,
-        externalExceptionBlock);
-
-    // External Exception Block
-
-    builder.SetInsertPoint(externalExceptionBlock);
-
-    generateStringPrint(context, 
-                        module,
-                        builder, 
-                        "Gen: Foreign exception received.\n",
-                        USE_GLOBAL_STR_CONSTS);
-
-    // Branch to the finally block
-    builder.CreateBr(finallyBlock);
-
-    // Exception Route Block
-
-    builder.SetInsertPoint(exceptionRouteBlock);
-
-    // Casts exception pointer (_Unwind_Exception instance) to parent 
-    // (OurException instance).
-    //
-    // Note: ourBaseFromUnwindOffset is usually negative
-    llvm::Value* typeInfoThrown = 
-        builder.CreatePointerCast(
-            builder.CreateConstGEP1_64(unwindException,
-                                       ourBaseFromUnwindOffset),
-            ourExceptionType->getPointerTo());
-
-    // Retrieve thrown exception type info type
-    //
-    // Note: Index is not relative to pointer but instead to structure
-    //       unlike a true getelementptr (GEP) instruction
-    typeInfoThrown = builder.CreateStructGEP(typeInfoThrown, 0);
-
-    llvm::Value* typeInfoThrownType = 
-                     builder.CreateStructGEP(typeInfoThrown, 0);
-
-    generateIntegerPrint(context, 
-                         module,
-                         builder, 
-                         *toPrint32Int, 
-                         *(builder.CreateLoad(typeInfoThrownType)),
-                         "Gen: Exception type <%d> received (stack unwound) " 
-                                 " in " + 
-                             ourId + 
-                             ".\n",
-                         USE_GLOBAL_STR_CONSTS);
-
-    // Route to matched type info catch block or run cleanup finally block
-    llvm::SwitchInst* switchToCatchBlock = 
-        builder.CreateSwitch(retTypeInfoIndex, 
-                             finallyBlock, 
-                             numExceptionsToCatch);
-
-    unsigned nextTypeToCatch;
-
-    for (unsigned i = 1; i <= numExceptionsToCatch; ++i) {
-        nextTypeToCatch = i - 1;
-        switchToCatchBlock->addCase(llvm::ConstantInt::get(
-                                        llvm::Type::getInt32Ty(context), 
-                                        i),
-                                    catchBlocks[nextTypeToCatch]);
-    }
-
-    llvm::verifyFunction(*ret);
-    fpm.run(*ret);
-
-    return(ret);
+llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module, 
+                                             llvm::IRBuilder<> &builder, 
+                                             llvm::FunctionPassManager &fpm,
+                                             llvm::Function &toInvoke,
+                                             std::string ourId,
+                                             unsigned numExceptionsToCatch,
+                                             unsigned exceptionTypesToCatch[]) {
+  
+  llvm::LLVMContext &context = module.getContext();
+  llvm::Function *toPrint32Int = module.getFunction("print32Int");
+  
+  ArgTypes argTypes;
+  argTypes.push_back(builder.getInt32Ty());
+  
+  ArgNames argNames;
+  argNames.push_back("exceptTypeToThrow");
+  
+  llvm::Function *ret = createFunction(module, 
+                                       builder.getVoidTy(),
+                                       argTypes, 
+                                       argNames, 
+                                       ourId,
+                                       llvm::Function::ExternalLinkage, 
+                                       false, 
+                                       false);
+  
+  // Block which calls invoke
+  llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context,
+                                                          "entry", 
+                                                          ret);
+  // Normal block for invoke
+  llvm::BasicBlock *normalBlock = llvm::BasicBlock::Create(context, 
+                                                           "normal", 
+                                                           ret);
+  // Unwind block for invoke
+  llvm::BasicBlock *exceptionBlock = 
+  llvm::BasicBlock::Create(context, "exception", ret);
+  
+  // Block which routes exception to correct catch handler block
+  llvm::BasicBlock *exceptionRouteBlock = 
+  llvm::BasicBlock::Create(context, "exceptionRoute", ret);
+  
+  // Foreign exception handler
+  llvm::BasicBlock *externalExceptionBlock = 
+  llvm::BasicBlock::Create(context, "externalException", ret);
+  
+  // Block which calls _Unwind_Resume
+  llvm::BasicBlock *unwindResumeBlock = 
+  llvm::BasicBlock::Create(context, "unwindResume", ret);
+  
+  // Clean up block which delete exception if needed
+  llvm::BasicBlock *endBlock = 
+  llvm::BasicBlock::Create(context, "end", ret);
+  
+  std::string nextName;
+  std::vector<llvm::BasicBlock*> catchBlocks(numExceptionsToCatch);
+  llvm::Value *exceptionCaughtFlag = NULL;
+  llvm::Value *exceptionStorage = NULL;
+  
+  // Finally block which will branch to unwindResumeBlock if 
+  // exception is not caught. Initializes/allocates stack locations.
+  llvm::BasicBlock *finallyBlock = createFinallyBlock(context, 
+                                                      module, 
+                                                      builder, 
+                                                      *ret, 
+                                                      nextName = "finally", 
+                                                      ourId,
+                                                      *endBlock,
+                                                      *unwindResumeBlock,
+                                                      &exceptionCaughtFlag,
+                                                      &exceptionStorage);
+  
+  for (unsigned i = 0; i < numExceptionsToCatch; ++i) {
+    nextName = ourTypeInfoNames[exceptionTypesToCatch[i]];
+    
+    // One catch block per type info to be caught
+    catchBlocks[i] = createCatchBlock(context, 
+                                      module, 
+                                      builder, 
+                                      *ret,
+                                      nextName, 
+                                      ourId,
+                                      *finallyBlock,
+                                      *exceptionCaughtFlag);
+  }
+  
+  // Entry Block
+  
+  builder.SetInsertPoint(entryBlock);
+  
+  std::vector<llvm::Value*> args;
+  args.push_back(namedValues["exceptTypeToThrow"]);
+  builder.CreateInvoke(&toInvoke, 
+                       normalBlock, 
+                       exceptionBlock, 
+                       args.begin(), 
+                       args.end());
+  
+  // End Block
+  
+  builder.SetInsertPoint(endBlock);
+  
+  generateStringPrint(context, 
+                      module,
+                      builder, 
+                      "Gen: In end block: exiting in " + ourId + ".\n",
+                      USE_GLOBAL_STR_CONSTS);
+  llvm::Function *deleteOurException = 
+  module.getFunction("deleteOurException");
+  
+  // Note: function handles NULL exceptions
+  builder.CreateCall(deleteOurException, 
+                     builder.CreateLoad(exceptionStorage));
+  builder.CreateRetVoid();
+  
+  // Normal Block
+  
+  builder.SetInsertPoint(normalBlock);
+  
+  generateStringPrint(context, 
+                      module,
+                      builder, 
+                      "Gen: No exception in " + ourId + "!\n",
+                      USE_GLOBAL_STR_CONSTS);
+  
+  // Finally block is always called
+  builder.CreateBr(finallyBlock);
+  
+  // Unwind Resume Block
+  
+  builder.SetInsertPoint(unwindResumeBlock);
+  
+  llvm::Function *resumeOurException = 
+  module.getFunction("_Unwind_Resume");
+  builder.CreateCall(resumeOurException, 
+                     builder.CreateLoad(exceptionStorage));
+  builder.CreateUnreachable();
+  
+  // Exception Block
+  
+  builder.SetInsertPoint(exceptionBlock);
+  
+  llvm::Function *ehException = module.getFunction("llvm.eh.exception");
+  
+  // Retrieve thrown exception
+  llvm::Value *unwindException = builder.CreateCall(ehException);
+  
+  // Store exception and flag
+  builder.CreateStore(unwindException, exceptionStorage);
+  builder.CreateStore(ourExceptionThrownState, exceptionCaughtFlag);
+  llvm::Function *personality = module.getFunction("ourPersonality");
+  llvm::Value *functPtr = 
+  builder.CreatePointerCast(personality, 
+                            builder.getInt8Ty()->getPointerTo());
+  
+  args.clear();
+  args.push_back(unwindException);
+  args.push_back(functPtr);
+  
+  // Note: Skipping index 0
+  for (unsigned i = 0; i < numExceptionsToCatch; ++i) {
+    // Set up type infos to be caught
+    args.push_back(module.getGlobalVariable(
+                                  ourTypeInfoNames[exceptionTypesToCatch[i]]));
+  }
+  
+  args.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), 0));
+  
+  llvm::Function *ehSelector = module.getFunction("llvm.eh.selector");
+  
+  // Set up this exeption block as the landing pad which will handle
+  // given type infos. See case Intrinsic::eh_selector in 
+  // SelectionDAGBuilder::visitIntrinsicCall(...) and AddCatchInfo(...)
+  // implemented in FunctionLoweringInfo.cpp to see how the implementation
+  // handles this call. This landing pad (this exception block), will be 
+  // called either because it nees to cleanup (call finally) or a type 
+  // info was found which matched the thrown exception.
+  llvm::Value *retTypeInfoIndex = builder.CreateCall(ehSelector, 
+                                                     args.begin(), 
+                                                     args.end());
+  
+  // Retrieve exception_class member from thrown exception 
+  // (_Unwind_Exception instance). This member tells us whether or not
+  // the exception is foreign.
+  llvm::Value *unwindExceptionClass = 
+    builder.CreateLoad(builder.CreateStructGEP(
+             builder.CreatePointerCast(unwindException, 
+                                       ourUnwindExceptionType->getPointerTo()), 
+                                               0));
+  
+  // Branch to the externalExceptionBlock if the exception is foreign or
+  // to a catch router if not. Either way the finally block will be run.
+  builder.CreateCondBr(builder.CreateICmpEQ(unwindExceptionClass,
+                            llvm::ConstantInt::get(builder.getInt64Ty(), 
+                                                   ourBaseExceptionClass)),
+                       exceptionRouteBlock,
+                       externalExceptionBlock);
+  
+  // External Exception Block
+  
+  builder.SetInsertPoint(externalExceptionBlock);
+  
+  generateStringPrint(context, 
+                      module,
+                      builder, 
+                      "Gen: Foreign exception received.\n",
+                      USE_GLOBAL_STR_CONSTS);
+  
+  // Branch to the finally block
+  builder.CreateBr(finallyBlock);
+  
+  // Exception Route Block
+  
+  builder.SetInsertPoint(exceptionRouteBlock);
+  
+  // Casts exception pointer (_Unwind_Exception instance) to parent 
+  // (OurException instance).
+  //
+  // Note: ourBaseFromUnwindOffset is usually negative
+  llvm::Value *typeInfoThrown = 
+  builder.CreatePointerCast(builder.CreateConstGEP1_64(unwindException,
+                                                       ourBaseFromUnwindOffset),
+                            ourExceptionType->getPointerTo());
+  
+  // Retrieve thrown exception type info type
+  //
+  // Note: Index is not relative to pointer but instead to structure
+  //       unlike a true getelementptr (GEP) instruction
+  typeInfoThrown = builder.CreateStructGEP(typeInfoThrown, 0);
+  
+  llvm::Value *typeInfoThrownType = 
+  builder.CreateStructGEP(typeInfoThrown, 0);
+  
+  generateIntegerPrint(context, 
+                       module,
+                       builder, 
+                       *toPrint32Int, 
+                       *(builder.CreateLoad(typeInfoThrownType)),
+                       "Gen: Exception type <%d> received (stack unwound) " 
+                       " in " + 
+                       ourId + 
+                       ".\n",
+                       USE_GLOBAL_STR_CONSTS);
+  
+  // Route to matched type info catch block or run cleanup finally block
+  llvm::SwitchInst *switchToCatchBlock = 
+  builder.CreateSwitch(retTypeInfoIndex, 
+                       finallyBlock, 
+                       numExceptionsToCatch);
+  
+  unsigned nextTypeToCatch;
+  
+  for (unsigned i = 1; i <= numExceptionsToCatch; ++i) {
+    nextTypeToCatch = i - 1;
+    switchToCatchBlock->addCase(llvm::ConstantInt::get(
+                                   llvm::Type::getInt32Ty(context), i),
+                                catchBlocks[nextTypeToCatch]);
+  }
+  
+  llvm::verifyFunction(*ret);
+  fpm.run(*ret);
+  
+  return(ret);
 }
 
 
@@ -1378,107 +1367,107 @@ llvm::Function* createCatchWrappedInvokeFunction(llvm::Module& module,
 ///        if the above nativeThrowType matches generated function's arg.
 /// @returns generated function
 static
-llvm::Function* createThrowExceptionFunction(llvm::Module& module, 
-                                         llvm::IRBuilder<>& builder, 
-                                         llvm::FunctionPassManager& fpm,
-                                         std::string ourId,
-                                         int32_t nativeThrowType,
-                                         llvm::Function& nativeThrowFunct) {
-    llvm::LLVMContext& context = module.getContext();
-    namedValues.clear();
-    ArgTypes unwindArgTypes;
-    unwindArgTypes.push_back(builder.getInt32Ty());
-    ArgNames unwindArgNames;
-    unwindArgNames.push_back("exceptTypeToThrow");
-
-    llvm::Function *ret = createFunction(module,
-                                         builder.getVoidTy(),
-                                         unwindArgTypes,
-                                         unwindArgNames,
-                                         ourId,
-                                         llvm::Function::ExternalLinkage,
-                                         false,
-                                         false);
-
-    // Throws either one of our exception or a native C++ exception depending
-    // on a runtime argument value containing a type info type.
-    llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context,
-                                                            "entry", 
-                                                            ret);
-    // Throws a foreign exception
-    llvm::BasicBlock *nativeThrowBlock = 
-                                llvm::BasicBlock::Create(context,
-                                                         "nativeThrow", 
-                                                         ret);
-    // Throws one of our Exceptions
-    llvm::BasicBlock *generatedThrowBlock = 
-                                llvm::BasicBlock::Create(context,
-                                                         "generatedThrow", 
-                                                         ret);
-    // Retrieved runtime type info type to throw
-    llvm::Value* exceptionType = namedValues["exceptTypeToThrow"];
-
-    // nativeThrowBlock block
-
-    builder.SetInsertPoint(nativeThrowBlock);
-
-    // Throws foreign exception
-    builder.CreateCall(&nativeThrowFunct, exceptionType);
-    builder.CreateUnreachable();
-
-    // entry block
-
-    builder.SetInsertPoint(entryBlock);
-
-    llvm::Function *toPrint32Int = module.getFunction("print32Int");
-    generateIntegerPrint(context, 
-                         module,
-                         builder, 
-                         *toPrint32Int, 
-                         *exceptionType, 
-                         "\nGen: About to throw exception type <%d> in " + 
-                             ourId + 
-                             ".\n",
-                         USE_GLOBAL_STR_CONSTS);
-
-    // Switches on runtime type info type value to determine whether or not
-    // a foreign exception is thrown. Defaults to throwing one of our 
-    // generated exceptions.
-    llvm::SwitchInst* theSwitch = builder.CreateSwitch(exceptionType,
-                                                       generatedThrowBlock,
-                                                       1);
-
-    theSwitch->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), 
-                                              nativeThrowType),
-                       nativeThrowBlock);
-
-    // generatedThrow block
-
-    builder.SetInsertPoint(generatedThrowBlock);
-
-    llvm::Function *createOurException = 
-                module.getFunction("createOurException");
-    llvm::Function *raiseOurException = 
-                module.getFunction("_Unwind_RaiseException");
-
-    // Creates exception to throw with runtime type info type.
-    llvm::Value* exception = 
-        builder.CreateCall(createOurException, 
-                           namedValues["exceptTypeToThrow"]);
-
-    // Throw generated Exception
-    builder.CreateCall(raiseOurException, exception);
-    builder.CreateUnreachable();
-
-    llvm::verifyFunction(*ret);
-    fpm.run(*ret);
-
-    return(ret);
+llvm::Function *createThrowExceptionFunction(llvm::Module &module, 
+                                             llvm::IRBuilder<> &builder, 
+                                             llvm::FunctionPassManager &fpm,
+                                             std::string ourId,
+                                             int32_t nativeThrowType,
+                                             llvm::Function &nativeThrowFunct) {
+  llvm::LLVMContext &context = module.getContext();
+  namedValues.clear();
+  ArgTypes unwindArgTypes;
+  unwindArgTypes.push_back(builder.getInt32Ty());
+  ArgNames unwindArgNames;
+  unwindArgNames.push_back("exceptTypeToThrow");
+  
+  llvm::Function *ret = createFunction(module,
+                                       builder.getVoidTy(),
+                                       unwindArgTypes,
+                                       unwindArgNames,
+                                       ourId,
+                                       llvm::Function::ExternalLinkage,
+                                       false,
+                                       false);
+  
+  // Throws either one of our exception or a native C++ exception depending
+  // on a runtime argument value containing a type info type.
+  llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context,
+                                                          "entry", 
+                                                          ret);
+  // Throws a foreign exception
+  llvm::BasicBlock *nativeThrowBlock = 
+  llvm::BasicBlock::Create(context,
+                           "nativeThrow", 
+                           ret);
+  // Throws one of our Exceptions
+  llvm::BasicBlock *generatedThrowBlock = 
+  llvm::BasicBlock::Create(context,
+                           "generatedThrow", 
+                           ret);
+  // Retrieved runtime type info type to throw
+  llvm::Value *exceptionType = namedValues["exceptTypeToThrow"];
+  
+  // nativeThrowBlock block
+  
+  builder.SetInsertPoint(nativeThrowBlock);
+  
+  // Throws foreign exception
+  builder.CreateCall(&nativeThrowFunct, exceptionType);
+  builder.CreateUnreachable();
+  
+  // entry block
+  
+  builder.SetInsertPoint(entryBlock);
+  
+  llvm::Function *toPrint32Int = module.getFunction("print32Int");
+  generateIntegerPrint(context, 
+                       module,
+                       builder, 
+                       *toPrint32Int, 
+                       *exceptionType, 
+                       "\nGen: About to throw exception type <%d> in " + 
+                       ourId + 
+                       ".\n",
+                       USE_GLOBAL_STR_CONSTS);
+  
+  // Switches on runtime type info type value to determine whether or not
+  // a foreign exception is thrown. Defaults to throwing one of our 
+  // generated exceptions.
+  llvm::SwitchInst *theSwitch = builder.CreateSwitch(exceptionType,
+                                                     generatedThrowBlock,
+                                                     1);
+  
+  theSwitch->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), 
+                                            nativeThrowType),
+                     nativeThrowBlock);
+  
+  // generatedThrow block
+  
+  builder.SetInsertPoint(generatedThrowBlock);
+  
+  llvm::Function *createOurException = 
+  module.getFunction("createOurException");
+  llvm::Function *raiseOurException = 
+  module.getFunction("_Unwind_RaiseException");
+  
+  // Creates exception to throw with runtime type info type.
+  llvm::Value *exception = 
+  builder.CreateCall(createOurException, 
+                     namedValues["exceptTypeToThrow"]);
+  
+  // Throw generated Exception
+  builder.CreateCall(raiseOurException, exception);
+  builder.CreateUnreachable();
+  
+  llvm::verifyFunction(*ret);
+  fpm.run(*ret);
+  
+  return(ret);
 }
 
 static void createStandardUtilityFunctions(unsigned numTypeInfos,
-                                           llvm::Module& module, 
-                                           llvm::IRBuilder<>& builder);
+                                           llvm::Module &module, 
+                                           llvm::IRBuilder<> &builder);
 
 /// Creates test code by generating and organizing these functions into the 
 /// test case. The test case consists of an outer function setup to invoke
@@ -1500,81 +1489,80 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
 /// @param nativeThrowFunctName name of external function which will throw
 ///        a foreign exception
 /// @returns outermost generated test function.
-llvm::Function* createUnwindExceptionTest(llvm::Module& module, 
-                                          llvm::IRBuilder<>& builder, 
-                                          llvm::FunctionPassManager& fpm,
+llvm::Function *createUnwindExceptionTest(llvm::Module &module, 
+                                          llvm::IRBuilder<> &builder, 
+                                          llvm::FunctionPassManager &fpm,
                                           std::string nativeThrowFunctName) {
-    // Number of type infos to generate
-    unsigned numTypeInfos = 6;
-
-    // Initialze intrisics and external functions to use along with exception
-    // and type info globals.
-    createStandardUtilityFunctions(numTypeInfos,
-                                   module,
-                                   builder);
-    llvm::Function *nativeThrowFunct = 
-        module.getFunction(nativeThrowFunctName);
-
-    // Create exception throw function using the value ~0 to cause 
-    // foreign exceptions to be thrown.
-    llvm::Function* throwFunct = 
-                            createThrowExceptionFunction(module,
-                                                         builder,
-                                                         fpm,
-                                                         "throwFunct",
-                                                         ~0,
-                                                         *nativeThrowFunct);
-    // Inner function will catch even type infos
-    unsigned innerExceptionTypesToCatch[] = {6, 2, 4};
-    size_t numExceptionTypesToCatch = sizeof(innerExceptionTypesToCatch) / 
-                                          sizeof(unsigned);
-
-    // Generate inner function.
-    llvm::Function* innerCatchFunct = 
-        createCatchWrappedInvokeFunction(module,
-                                         builder,
-                                         fpm,
-                                         *throwFunct,
-                                         "innerCatchFunct",
-                                         numExceptionTypesToCatch,
-                                         innerExceptionTypesToCatch);
-
-    // Outer function will catch odd type infos
-    unsigned outerExceptionTypesToCatch[] = {3, 1, 5};
-    numExceptionTypesToCatch = sizeof(outerExceptionTypesToCatch) / 
-                                   sizeof(unsigned);
-
-    // Generate outer function
-    llvm::Function* outerCatchFunct = 
-        createCatchWrappedInvokeFunction(module,
-                                         builder,
-                                         fpm,
-                                         *innerCatchFunct,
-                                         "outerCatchFunct",
-                                         numExceptionTypesToCatch,
-                                         outerExceptionTypesToCatch);
-
-    // Return outer function to run
-    return(outerCatchFunct);
+  // Number of type infos to generate
+  unsigned numTypeInfos = 6;
+  
+  // Initialze intrisics and external functions to use along with exception
+  // and type info globals.
+  createStandardUtilityFunctions(numTypeInfos,
+                                 module,
+                                 builder);
+  llvm::Function *nativeThrowFunct = 
+  module.getFunction(nativeThrowFunctName);
+  
+  // Create exception throw function using the value ~0 to cause 
+  // foreign exceptions to be thrown.
+  llvm::Function *throwFunct = 
+  createThrowExceptionFunction(module,
+                               builder,
+                               fpm,
+                               "throwFunct",
+                               ~0,
+                               *nativeThrowFunct);
+  // Inner function will catch even type infos
+  unsigned innerExceptionTypesToCatch[] = {6, 2, 4};
+  size_t numExceptionTypesToCatch = sizeof(innerExceptionTypesToCatch) / 
+  sizeof(unsigned);
+  
+  // Generate inner function.
+  llvm::Function *innerCatchFunct = 
+  createCatchWrappedInvokeFunction(module,
+                                   builder,
+                                   fpm,
+                                   *throwFunct,
+                                   "innerCatchFunct",
+                                   numExceptionTypesToCatch,
+                                   innerExceptionTypesToCatch);
+  
+  // Outer function will catch odd type infos
+  unsigned outerExceptionTypesToCatch[] = {3, 1, 5};
+  numExceptionTypesToCatch = sizeof(outerExceptionTypesToCatch) / 
+  sizeof(unsigned);
+  
+  // Generate outer function
+  llvm::Function *outerCatchFunct = 
+  createCatchWrappedInvokeFunction(module,
+                                   builder,
+                                   fpm,
+                                   *innerCatchFunct,
+                                   "outerCatchFunct",
+                                   numExceptionTypesToCatch,
+                                   outerExceptionTypesToCatch);
+  
+  // Return outer function to run
+  return(outerCatchFunct);
 }
 
 
 /// Represents our foreign exceptions
 class OurCppRunException : public std::runtime_error {
 public:
-    OurCppRunException(const std::string reason) :
-        std::runtime_error(reason) {}
-
-    OurCppRunException (const OurCppRunException& toCopy) :
-        std::runtime_error(toCopy) {}
-
-    OurCppRunException& operator = (const OurCppRunException& toCopy) {
-        return(reinterpret_cast<OurCppRunException&>(
-                   std::runtime_error::operator = (toCopy)
-               ));
-    }
-
-    ~OurCppRunException (void) throw () {}
+  OurCppRunException(const std::string reason) :
+  std::runtime_error(reason) {}
+  
+  OurCppRunException (const OurCppRunException &toCopy) :
+  std::runtime_error(toCopy) {}
+  
+  OurCppRunException &operator = (const OurCppRunException &toCopy) {
+    return(reinterpret_cast<OurCppRunException&>(
+                                 std::runtime_error::operator=(toCopy)));
+  }
+  
+  ~OurCppRunException (void) throw () {}
 };
 
 
@@ -1583,13 +1571,13 @@ public:
 ///        generated function contract.
 extern "C"
 void throwCppException (int32_t ignoreIt) {
-    throw(OurCppRunException("thrown by throwCppException(...)"));
+  throw(OurCppRunException("thrown by throwCppException(...)"));
 }
 
 typedef void (*OurExceptionThrowFunctType) (int32_t typeToThrow);
 
 /// This is a test harness which runs test by executing generated 
-/// function with a type info type to throw. Harness wraps the excecution 
+/// function with a type info type to throw. Harness wraps the execution
 /// of generated function in a C++ try catch clause.
 /// @param engine execution engine to use for executing generated function.
 ///        This demo program expects this to be a JIT instance for demo
@@ -1598,45 +1586,44 @@ typedef void (*OurExceptionThrowFunctType) (int32_t typeToThrow);
 /// @param typeToThrow type info type of generated exception to throw, or
 ///        indicator to cause foreign exception to be thrown.
 static
-void runExceptionThrow(llvm::ExecutionEngine* engine, 
-                       llvm::Function* function, 
+void runExceptionThrow(llvm::ExecutionEngine *engine, 
+                       llvm::Function *function, 
                        int32_t typeToThrow) {
-
-    // Find test's function pointer
-    OurExceptionThrowFunctType functPtr = 
-          reinterpret_cast<OurExceptionThrowFunctType>(
-              reinterpret_cast<intptr_t>(
-                  engine->getPointerToFunction(function)
-              )
-          );
-
-    try {
-        // Run test
-        (*functPtr)(typeToThrow);
-    }
-    catch (OurCppRunException exc) {
-        // Catch foreign C++ exception
-        fprintf(stderr,
-                "\nrunExceptionThrow(...):In C++ catch OurCppRunException "
-                    "with reason: %s.\n", 
-                exc.what());
-    }
-    catch (...) {
-        // Catch all exceptions including our generated ones. I'm not sure
-        // why this latter functionality should work, as it seems that
-        // our exceptions should be foreign to C++ (the _Unwind_Exception::
-        // exception_class should be different from the one used by C++), and
-        // therefore C++ should ignore the generated exceptions. 
-
-        fprintf(stderr,
-                "\nrunExceptionThrow(...):In C++ catch all.\n");
-    }
+  
+  // Find test's function pointer
+  OurExceptionThrowFunctType functPtr = 
+    reinterpret_cast<OurExceptionThrowFunctType>(
+       reinterpret_cast<intptr_t>(engine->getPointerToFunction(function)));
+  
+  try {
+    // Run test
+    (*functPtr)(typeToThrow);
+  }
+  catch (OurCppRunException exc) {
+    // Catch foreign C++ exception
+    fprintf(stderr,
+            "\nrunExceptionThrow(...):In C++ catch OurCppRunException "
+            "with reason: %s.\n", 
+            exc.what());
+  }
+  catch (...) {
+    // Catch all exceptions including our generated ones. I'm not sure
+    // why this latter functionality should work, as it seems that
+    // our exceptions should be foreign to C++ (the _Unwind_Exception::
+    // exception_class should be different from the one used by C++), and
+    // therefore C++ should ignore the generated exceptions. 
+    
+    fprintf(stderr,
+            "\nrunExceptionThrow(...):In C++ catch all.\n");
+  }
 }
 
 //
 // End test functions
 //
 
+typedef llvm::ArrayRef<const llvm::Type*> TypeArray;
+
 /// This initialization routine creates type info globals and 
 /// adds external function declarations to module.
 /// @param numTypeInfos number of linear type info associated type info types
@@ -1644,287 +1631,285 @@ void runExceptionThrow(llvm::ExecutionEngine* engine,
 /// @param module code for module instance
 /// @param builder builder instance
 static void createStandardUtilityFunctions(unsigned numTypeInfos,
-                                           llvm::Module& module, 
-                                           llvm::IRBuilder<>& builder) {
-
-    llvm::LLVMContext& context = module.getContext();
-
-    // Exception initializations
-
-    // Setup exception catch state
-    ourExceptionNotThrownState = 
-                    llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 0),
-    ourExceptionThrownState = 
-                    llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 1),
-    ourExceptionCaughtState = 
-                    llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 2),
-
-
-    // Create our type info type
-    ourTypeInfoType = llvm::StructType::get(context, 
-                                            builder.getInt32Ty(), 
-                                            NULL);
-
-    // Create OurException type
-    ourExceptionType = llvm::StructType::get(context, 
-                                             ourTypeInfoType,
-                                             NULL);
-
-    // Create portion of _Unwind_Exception type
-    //
-    // Note: Declaring only a portion of the _Unwind_Exception struct.
-    //       Does this cause problems?
-    ourUnwindExceptionType = llvm::StructType::get(context, 
-                                                   builder.getInt64Ty(),
-                                                   NULL);
-    struct OurBaseException_t dummyException;
-
-    // Calculate offset of OurException::unwindException member.
-    ourBaseFromUnwindOffset = ((uintptr_t) &dummyException) - 
-                           ((uintptr_t) &(dummyException.unwindException));
-
+                                           llvm::Module &module, 
+                                           llvm::IRBuilder<> &builder) {
+  
+  llvm::LLVMContext &context = module.getContext();
+  
+  // Exception initializations
+  
+  // Setup exception catch state
+  ourExceptionNotThrownState = 
+  llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 0),
+  ourExceptionThrownState = 
+  llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 1),
+  ourExceptionCaughtState = 
+  llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 2),
+  
+  
+  
+  // Create our type info type
+  ourTypeInfoType = llvm::StructType::get(context, 
+                                          TypeArray(builder.getInt32Ty()));
+  
+  // Create OurException type
+  ourExceptionType = llvm::StructType::get(context, 
+                                           TypeArray(ourTypeInfoType));
+  
+  // Create portion of _Unwind_Exception type
+  //
+  // Note: Declaring only a portion of the _Unwind_Exception struct.
+  //       Does this cause problems?
+  ourUnwindExceptionType =
+    llvm::StructType::get(context, TypeArray(builder.getInt64Ty()));
+  struct OurBaseException_t dummyException;
+  
+  // Calculate offset of OurException::unwindException member.
+  ourBaseFromUnwindOffset = ((uintptr_t) &dummyException) - 
+  ((uintptr_t) &(dummyException.unwindException));
+  
 #ifdef DEBUG
-    fprintf(stderr,
-            "createStandardUtilityFunctions(...):ourBaseFromUnwindOffset "
-                "= %lld, sizeof(struct OurBaseException_t) - "
-                "sizeof(struct _Unwind_Exception) = %lu.\n",
-            ourBaseFromUnwindOffset,
-            sizeof(struct OurBaseException_t) - 
-                sizeof(struct _Unwind_Exception));
+  fprintf(stderr,
+          "createStandardUtilityFunctions(...):ourBaseFromUnwindOffset "
+          "= %lld, sizeof(struct OurBaseException_t) - "
+          "sizeof(struct _Unwind_Exception) = %lu.\n",
+          ourBaseFromUnwindOffset,
+          sizeof(struct OurBaseException_t) - 
+          sizeof(struct _Unwind_Exception));
 #endif
-
-    size_t numChars = sizeof(ourBaseExcpClassChars) / sizeof(char);
-
-    // Create our _Unwind_Exception::exception_class value
-    ourBaseExceptionClass = genClass(ourBaseExcpClassChars, numChars);
-
-    // Type infos
-
-    std::string baseStr = "typeInfo", typeInfoName;
-    std::ostringstream typeInfoNameBuilder;
-    std::vector<llvm::Constant*> structVals;
+  
+  size_t numChars = sizeof(ourBaseExcpClassChars) / sizeof(char);
+  
+  // Create our _Unwind_Exception::exception_class value
+  ourBaseExceptionClass = genClass(ourBaseExcpClassChars, numChars);
+  
+  // Type infos
+  
+  std::string baseStr = "typeInfo", typeInfoName;
+  std::ostringstream typeInfoNameBuilder;
+  std::vector<llvm::Constant*> structVals;
+  
+  llvm::Constant *nextStruct;
+  llvm::GlobalVariable *nextGlobal = NULL;
+  
+  // Generate each type info
+  //
+  // Note: First type info is not used.
+  for (unsigned i = 0; i <= numTypeInfos; ++i) {
+    structVals.clear();
+    structVals.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), i));
+    nextStruct = llvm::ConstantStruct::get(ourTypeInfoType, structVals);
     
-    llvm::Constant *nextStruct;
-    llvm::GlobalVariable* nextGlobal = NULL;
-
-    // Generate each type info
-    //
-    // Note: First type info is not used.
-    for (unsigned i = 0; i <= numTypeInfos; ++i) {
-        structVals.clear();
-        structVals.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), i));
-        nextStruct = llvm::ConstantStruct::get(ourTypeInfoType, structVals);
-
-        typeInfoNameBuilder.str("");
-        typeInfoNameBuilder << baseStr << i;
-        typeInfoName = typeInfoNameBuilder.str();
-
-        // Note: Does not seem to work without allocation
-        nextGlobal = 
-            new llvm::GlobalVariable(module, 
-                                     ourTypeInfoType, 
-                                     true, 
-                                     llvm::GlobalValue::ExternalLinkage, 
-                                     nextStruct, 
-                                     typeInfoName);
-
-        ourTypeInfoNames.push_back(typeInfoName);
-        ourTypeInfoNamesIndex[i] = typeInfoName;
-    }
-
-    ArgNames argNames;
-    ArgTypes argTypes;
-    llvm::Function* funct = NULL;
-
-    // print32Int
-
-    const llvm::Type* retType = builder.getVoidTy();
-
-    argTypes.clear();
-    argTypes.push_back(builder.getInt32Ty());
-    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
-
-    argNames.clear();
-
-    createFunction(module, 
-                   retType, 
-                   argTypes, 
-                   argNames, 
-                   "print32Int", 
-                   llvm::Function::ExternalLinkage, 
-                   true, 
-                   false);
-
-    // print64Int
-
-    retType = builder.getVoidTy();
-
-    argTypes.clear();
-    argTypes.push_back(builder.getInt64Ty());
-    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
-
-    argNames.clear();
-
-    createFunction(module, 
-                   retType, 
-                   argTypes, 
-                   argNames, 
-                   "print64Int", 
-                   llvm::Function::ExternalLinkage, 
-                   true, 
-                   false);
-
-    // printStr
-
-    retType = builder.getVoidTy();
-
-    argTypes.clear();
-    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
-
-    argNames.clear();
-
-    createFunction(module, 
-                   retType, 
-                   argTypes, 
-                   argNames, 
-                   "printStr", 
-                   llvm::Function::ExternalLinkage, 
-                   true, 
-                   false);
-
-    // throwCppException
-
-    retType = builder.getVoidTy();
-
-    argTypes.clear();
-    argTypes.push_back(builder.getInt32Ty());
-
-    argNames.clear();
-
-    createFunction(module, 
-                   retType, 
-                   argTypes, 
-                   argNames, 
-                   "throwCppException", 
-                   llvm::Function::ExternalLinkage, 
-                   true, 
-                   false);
-
-    // deleteOurException
-
-    retType = builder.getVoidTy();
-
-    argTypes.clear();
-    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
-
-    argNames.clear();
-
-    createFunction(module, 
-                   retType, 
-                   argTypes, 
-                   argNames, 
-                   "deleteOurException", 
-                   llvm::Function::ExternalLinkage, 
-                   true, 
-                   false);
-
-    // createOurException
-
-    retType = builder.getInt8Ty()->getPointerTo();
-
-    argTypes.clear();
-    argTypes.push_back(builder.getInt32Ty());
-
-    argNames.clear();
-
-    createFunction(module, 
-                   retType, 
-                   argTypes, 
-                   argNames, 
-                   "createOurException", 
-                   llvm::Function::ExternalLinkage, 
-                   true, 
-                   false);
-
-    // _Unwind_RaiseException
-
-    retType = builder.getInt32Ty();
-
-    argTypes.clear();
-    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
-
-    argNames.clear();
-
-    funct = createFunction(module, 
-                           retType, 
-                           argTypes, 
-                           argNames, 
-                           "_Unwind_RaiseException", 
-                           llvm::Function::ExternalLinkage, 
-                           true, 
-                           false);
-
-    funct->addFnAttr(llvm::Attribute::NoReturn);
-
-    // _Unwind_Resume
-
-    retType = builder.getInt32Ty();
-
-    argTypes.clear();
-    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
-
-    argNames.clear();
-
-    funct = createFunction(module, 
-                           retType, 
-                           argTypes, 
-                           argNames, 
-                           "_Unwind_Resume", 
-                           llvm::Function::ExternalLinkage, 
-                           true, 
-                           false);
-
-    funct->addFnAttr(llvm::Attribute::NoReturn);
-
-    // ourPersonality
-
-    retType = builder.getInt32Ty();
-
-    argTypes.clear();
-    argTypes.push_back(builder.getInt32Ty());
-    argTypes.push_back(builder.getInt32Ty());
-    argTypes.push_back(builder.getInt64Ty());
-    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
-    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
-
-    argNames.clear();
-
-    createFunction(module, 
-                   retType, 
-                   argTypes, 
-                   argNames, 
-                   "ourPersonality", 
-                   llvm::Function::ExternalLinkage, 
-                   true, 
-                   false);
-
-    // llvm.eh.selector intrinsic
-
-    getDeclaration(&module, llvm::Intrinsic::eh_selector);
-
-    // llvm.eh.exception intrinsic
-
-    getDeclaration(&module, llvm::Intrinsic::eh_exception);
-
-    // llvm.eh.typeid.for intrinsic
-
-    getDeclaration(&module, llvm::Intrinsic::eh_typeid_for);
+    typeInfoNameBuilder.str("");
+    typeInfoNameBuilder << baseStr << i;
+    typeInfoName = typeInfoNameBuilder.str();
+    
+    // Note: Does not seem to work without allocation
+    nextGlobal = 
+    new llvm::GlobalVariable(module, 
+                             ourTypeInfoType, 
+                             true, 
+                             llvm::GlobalValue::ExternalLinkage, 
+                             nextStruct, 
+                             typeInfoName);
+    
+    ourTypeInfoNames.push_back(typeInfoName);
+    ourTypeInfoNamesIndex[i] = typeInfoName;
+  }
+  
+  ArgNames argNames;
+  ArgTypes argTypes;
+  llvm::Function *funct = NULL;
+  
+  // print32Int
+  
+  const llvm::Type *retType = builder.getVoidTy();
+  
+  argTypes.clear();
+  argTypes.push_back(builder.getInt32Ty());
+  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+  
+  argNames.clear();
+  
+  createFunction(module, 
+                 retType, 
+                 argTypes, 
+                 argNames, 
+                 "print32Int", 
+                 llvm::Function::ExternalLinkage, 
+                 true, 
+                 false);
+  
+  // print64Int
+  
+  retType = builder.getVoidTy();
+  
+  argTypes.clear();
+  argTypes.push_back(builder.getInt64Ty());
+  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+  
+  argNames.clear();
+  
+  createFunction(module, 
+                 retType, 
+                 argTypes, 
+                 argNames, 
+                 "print64Int", 
+                 llvm::Function::ExternalLinkage, 
+                 true, 
+                 false);
+  
+  // printStr
+  
+  retType = builder.getVoidTy();
+  
+  argTypes.clear();
+  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+  
+  argNames.clear();
+  
+  createFunction(module, 
+                 retType, 
+                 argTypes, 
+                 argNames, 
+                 "printStr", 
+                 llvm::Function::ExternalLinkage, 
+                 true, 
+                 false);
+  
+  // throwCppException
+  
+  retType = builder.getVoidTy();
+  
+  argTypes.clear();
+  argTypes.push_back(builder.getInt32Ty());
+  
+  argNames.clear();
+  
+  createFunction(module, 
+                 retType, 
+                 argTypes, 
+                 argNames, 
+                 "throwCppException", 
+                 llvm::Function::ExternalLinkage, 
+                 true, 
+                 false);
+  
+  // deleteOurException
+  
+  retType = builder.getVoidTy();
+  
+  argTypes.clear();
+  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+  
+  argNames.clear();
+  
+  createFunction(module, 
+                 retType, 
+                 argTypes, 
+                 argNames, 
+                 "deleteOurException", 
+                 llvm::Function::ExternalLinkage, 
+                 true, 
+                 false);
+  
+  // createOurException
+  
+  retType = builder.getInt8Ty()->getPointerTo();
+  
+  argTypes.clear();
+  argTypes.push_back(builder.getInt32Ty());
+  
+  argNames.clear();
+  
+  createFunction(module, 
+                 retType, 
+                 argTypes, 
+                 argNames, 
+                 "createOurException", 
+                 llvm::Function::ExternalLinkage, 
+                 true, 
+                 false);
+  
+  // _Unwind_RaiseException
+  
+  retType = builder.getInt32Ty();
+  
+  argTypes.clear();
+  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+  
+  argNames.clear();
+  
+  funct = createFunction(module, 
+                         retType, 
+                         argTypes, 
+                         argNames, 
+                         "_Unwind_RaiseException", 
+                         llvm::Function::ExternalLinkage, 
+                         true, 
+                         false);
+  
+  funct->addFnAttr(llvm::Attribute::NoReturn);
+  
+  // _Unwind_Resume
+  
+  retType = builder.getInt32Ty();
+  
+  argTypes.clear();
+  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+  
+  argNames.clear();
+  
+  funct = createFunction(module, 
+                         retType, 
+                         argTypes, 
+                         argNames, 
+                         "_Unwind_Resume", 
+                         llvm::Function::ExternalLinkage, 
+                         true, 
+                         false);
+  
+  funct->addFnAttr(llvm::Attribute::NoReturn);
+  
+  // ourPersonality
+  
+  retType = builder.getInt32Ty();
+  
+  argTypes.clear();
+  argTypes.push_back(builder.getInt32Ty());
+  argTypes.push_back(builder.getInt32Ty());
+  argTypes.push_back(builder.getInt64Ty());
+  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+  
+  argNames.clear();
+  
+  createFunction(module, 
+                 retType, 
+                 argTypes, 
+                 argNames, 
+                 "ourPersonality", 
+                 llvm::Function::ExternalLinkage, 
+                 true, 
+                 false);
+  
+  // llvm.eh.selector intrinsic
+  
+  getDeclaration(&module, llvm::Intrinsic::eh_selector);
+  
+  // llvm.eh.exception intrinsic
+  
+  getDeclaration(&module, llvm::Intrinsic::eh_exception);
+  
+  // llvm.eh.typeid.for intrinsic
+  
+  getDeclaration(&module, llvm::Intrinsic::eh_typeid_for);
 }
 
 
-//===---------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 // Main test driver code.
-//===---------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 
 /// Demo main routine which takes the type info types to throw. A test will
 /// be run for each given type info type. While type info types with the value 
@@ -1932,99 +1917,99 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
 /// <= 6 and >= 1 will be caught by test functions; and type info types > 6
 /// will result in exceptions which pass through to the test harness. All other
 /// type info types are not supported and could cause a crash.
-int main(int argc, char* argv[]) {
-    if (argc == 1) {
-        fprintf(stderr,
-                "\nUsage: ExceptionDemo <exception type to throw> "
-                    "[<type 2>...<type n>].\n"
-                    "   Each type must have the value of 1 - 6 for "
-                    "generated exceptions to be caught;\n"
-                    "   the value -1 for foreign C++ exceptions to be "
-                    "generated and thrown;\n"
-                    "   or the values > 6 for exceptions to be ignored.\n"
-                    "\nTry: ExceptionDemo 2 3 7 -1\n"
-                    "   for a full test.\n\n");
-        return(0);
-    }
-
-    // If not set, exception handling will not be turned on
-    llvm::JITExceptionHandling = true;
-
-    llvm::InitializeNativeTarget();
-    llvm::LLVMContext& context = llvm::getGlobalContext();
-    llvm::IRBuilder<> theBuilder(context);
-
-    // Make the module, which holds all the code.
-    llvm::Module* module = new llvm::Module("my cool jit", context);
-
-    // Build engine with JIT
-    llvm::EngineBuilder factory(module);
-    factory.setEngineKind(llvm::EngineKind::JIT);
-    factory.setAllocateGVsWithCode(false);
-    llvm::ExecutionEngine* executionEngine = factory.create();
-
-    {
-        llvm::FunctionPassManager fpm(module);
-
-        // Set up the optimizer pipeline.  
-        // Start with registering info about how the
-        // target lays out data structures.
-        fpm.add(new llvm::TargetData(*executionEngine->getTargetData()));
-
-        // Optimizations turned on
+int main(int argc, char *argv[]) {
+  if (argc == 1) {
+    fprintf(stderr,
+            "\nUsage: ExceptionDemo <exception type to throw> "
+            "[<type 2>...<type n>].\n"
+            "   Each type must have the value of 1 - 6 for "
+            "generated exceptions to be caught;\n"
+            "   the value -1 for foreign C++ exceptions to be "
+            "generated and thrown;\n"
+            "   or the values > 6 for exceptions to be ignored.\n"
+            "\nTry: ExceptionDemo 2 3 7 -1\n"
+            "   for a full test.\n\n");
+    return(0);
+  }
+  
+  // If not set, exception handling will not be turned on
+  llvm::JITExceptionHandling = true;
+  
+  llvm::InitializeNativeTarget();
+  llvm::LLVMContext &context = llvm::getGlobalContext();
+  llvm::IRBuilder<> theBuilder(context);
+  
+  // Make the module, which holds all the code.
+  llvm::Module *module = new llvm::Module("my cool jit", context);
+  
+  // Build engine with JIT
+  llvm::EngineBuilder factory(module);
+  factory.setEngineKind(llvm::EngineKind::JIT);
+  factory.setAllocateGVsWithCode(false);
+  llvm::ExecutionEngine *executionEngine = factory.create();
+  
+  {
+    llvm::FunctionPassManager fpm(module);
+    
+    // Set up the optimizer pipeline.  
+    // Start with registering info about how the
+    // target lays out data structures.
+    fpm.add(new llvm::TargetData(*executionEngine->getTargetData()));
+    
+    // Optimizations turned on
 #ifdef ADD_OPT_PASSES
-
-        // Basic AliasAnslysis support for GVN.
-        fpm.add(llvm::createBasicAliasAnalysisPass());
-
-        // Promote allocas to registers.
-        fpm.add(llvm::createPromoteMemoryToRegisterPass());
-
-        // Do simple "peephole" optimizations and bit-twiddling optzns.
-        fpm.add(llvm::createInstructionCombiningPass());
-
-        // Reassociate expressions.
-        fpm.add(llvm::createReassociatePass());
-
-        // Eliminate Common SubExpressions.
-        fpm.add(llvm::createGVNPass());
-
-        // Simplify the control flow graph (deleting unreachable 
-        // blocks, etc).
-        fpm.add(llvm::createCFGSimplificationPass());
+    
+    // Basic AliasAnslysis support for GVN.
+    fpm.add(llvm::createBasicAliasAnalysisPass());
+    
+    // Promote allocas to registers.
+    fpm.add(llvm::createPromoteMemoryToRegisterPass());
+    
+    // Do simple "peephole" optimizations and bit-twiddling optzns.
+    fpm.add(llvm::createInstructionCombiningPass());
+    
+    // Reassociate expressions.
+    fpm.add(llvm::createReassociatePass());
+    
+    // Eliminate Common SubExpressions.
+    fpm.add(llvm::createGVNPass());
+    
+    // Simplify the control flow graph (deleting unreachable 
+    // blocks, etc).
+    fpm.add(llvm::createCFGSimplificationPass());
 #endif  // ADD_OPT_PASSES
-
-        fpm.doInitialization();
-
-        // Generate test code using function throwCppException(...) as
-        // the function which throws foreign exceptions.
-        llvm::Function* toRun = 
-                          createUnwindExceptionTest(*module, 
-                                                    theBuilder, 
-                                                    fpm,
-                                                    "throwCppException");
-
-        fprintf(stderr, "\nBegin module dump:\n\n");
-
-        module->dump();
-
-        fprintf(stderr, "\nEnd module dump:\n");
-
-        fprintf(stderr, "\n\nBegin Test:\n");
-
-        for (int i = 1; i < argc; ++i) {
-            // Run test for each argument whose value is the exception
-            // type to throw.
-            runExceptionThrow(executionEngine, 
-                              toRun, 
-                              (unsigned) strtoul(argv[i], NULL, 10));
-        }
-
-        fprintf(stderr, "\nEnd Test:\n\n");
-    } 
-
-    delete executionEngine;
+    
+    fpm.doInitialization();
+    
+    // Generate test code using function throwCppException(...) as
+    // the function which throws foreign exceptions.
+    llvm::Function *toRun = 
+    createUnwindExceptionTest(*module, 
+                              theBuilder, 
+                              fpm,
+                              "throwCppException");
+    
+    fprintf(stderr, "\nBegin module dump:\n\n");
+    
+    module->dump();
+    
+    fprintf(stderr, "\nEnd module dump:\n");
+    
+    fprintf(stderr, "\n\nBegin Test:\n");
+    
+    for (int i = 1; i < argc; ++i) {
+      // Run test for each argument whose value is the exception
+      // type to throw.
+      runExceptionThrow(executionEngine, 
+                        toRun, 
+                        (unsigned) strtoul(argv[i], NULL, 10));
+    }
+    
+    fprintf(stderr, "\nEnd Test:\n\n");
+  } 
+  
+  delete executionEngine;
   
-    return 0;
+  return 0;
 }
 
diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp
index 26b3db6..5dcc7ed 100644
--- a/examples/Kaleidoscope/Chapter5/toy.cpp
+++ b/examples/Kaleidoscope/Chapter5/toy.cpp
@@ -550,7 +550,7 @@ Value *IfExprAST::Codegen() {
   // Emit merge block.
   TheFunction->getBasicBlockList().push_back(MergeBB);
   Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
                                   "iftmp");
   
   PN->addIncoming(ThenV, ThenBB);
@@ -592,7 +592,7 @@ Value *ForExprAST::Codegen() {
   Builder.SetInsertPoint(LoopBB);
   
   // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
+  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
   Variable->addIncoming(StartVal, PreheaderBB);
   
   // Within the loop, the variable is defined equal to the PHI node.  If it
diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp
index 838125a..c557699 100644
--- a/examples/Kaleidoscope/Chapter6/toy.cpp
+++ b/examples/Kaleidoscope/Chapter6/toy.cpp
@@ -654,7 +654,7 @@ Value *IfExprAST::Codegen() {
   // Emit merge block.
   TheFunction->getBasicBlockList().push_back(MergeBB);
   Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
                                   "iftmp");
   
   PN->addIncoming(ThenV, ThenBB);
@@ -696,7 +696,7 @@ Value *ForExprAST::Codegen() {
   Builder.SetInsertPoint(LoopBB);
   
   // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
+  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
   Variable->addIncoming(StartVal, PreheaderBB);
   
   // Within the loop, the variable is defined equal to the PHI node.  If it
diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp
index e63578f..6afd118 100644
--- a/examples/Kaleidoscope/Chapter7/toy.cpp
+++ b/examples/Kaleidoscope/Chapter7/toy.cpp
@@ -750,7 +750,7 @@ Value *IfExprAST::Codegen() {
   // Emit merge block.
   TheFunction->getBasicBlockList().push_back(MergeBB);
   Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
                                   "iftmp");
   
   PN->addIncoming(ThenV, ThenBB);
diff --git a/include/llvm-c/Disassembler.h b/include/llvm-c/Disassembler.h
new file mode 100644
index 0000000..9f10973
--- /dev/null
+++ b/include/llvm-c/Disassembler.h
@@ -0,0 +1,149 @@
+/*===-- llvm-c/Disassembler.h - Disassembler Public C Interface ---*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header provides public interface to a disassembler library.           *|
+|* LLVM provides an implementation of this interface.                         *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_DISASSEMBLER_H
+#define LLVM_C_DISASSEMBLER_H  1
+
+#include <stddef.h>
+#include "llvm/Support/DataTypes.h"
+
+/**
+ * An opaque reference to a disassembler context.
+ */
+typedef void *LLVMDisasmContextRef;
+
+/**
+ * The type for the operand information call back function.  This is called to
+ * get the symbolic information for an operand of an instruction.  Typically
+ * this is from the relocation information, symbol table, etc.  That block of
+ * information is saved when the disassembler context is created and passed to
+ * the call back in the DisInfo parameter.  The instruction containing operand
+ * is at the PC parameter.  For some instruction sets, there can be more than
+ * one operand with symbolic information.  To determine the symbolic operand
+ * information for each operand, the bytes for the specific operand in the
+ * instruction are specified by the Offset parameter and its byte widith is the
+ * size parameter.  For instructions sets with fixed widths and one symbolic
+ * operand per instruction, the Offset parameter will be zero and Size parameter
+ * will be the instruction width.  The information is returned in TagBuf and is 
+ * Triple specific with its specific information defined by the value of
+ * TagType for that Triple.  If symbolic information is returned the function
+ * returns 1 else it returns 0.
+ */
+typedef int (*LLVMOpInfoCallback)(void *DisInfo,
+                                  uint64_t PC,
+                                  uint64_t Offset,
+                                  uint64_t Size,
+                                  int TagType,
+                                  void *TagBuf);
+
+/**
+ * The initial support in LLVM MC for the most general form of a relocatable
+ * expression is "AddSymbol - SubtractSymbol + Offset".  For some Darwin targets
+ * this full form is encoded in the relocation information so that AddSymbol and
+ * SubtractSymbol can be link edited independent of each other.  Many other
+ * platforms only allow a relocatable expression of the form AddSymbol + Offset
+ * to be encoded.
+ * 
+ * The LLVMOpInfoCallback() for the TagType value of 1 uses the struct
+ * LLVMOpInfo1.  The value of the relocatable expression for the operand,
+ * including any PC adjustment, is passed in to the call back in the Value
+ * field.  The symbolic information about the operand is returned using all
+ * the fields of the structure with the Offset of the relocatable expression
+ * returned in the Value field.  It is possible that some symbols in the
+ * relocatable expression were assembly temporary symbols, for example
+ * "Ldata - LpicBase + constant", and only the Values of the symbols without
+ * symbol names are present in the relocation information.  The VariantKind
+ * type is one of the Target specific #defines below and is used to print
+ * operands like "_foo@GOT", ":lower16:_foo", etc.
+ */
+struct LLVMOpInfoSymbol1 {
+  uint64_t Present; /* 1 if this symbol is present */
+  char *Name;     /* symbol name if not NULL */
+  uint64_t Value; /* symbol value if name is NULL */
+};
+struct LLVMOpInfo1 {
+  struct LLVMOpInfoSymbol1 AddSymbol;
+  struct LLVMOpInfoSymbol1 SubtractSymbol;
+  uint64_t Value;
+  uint64_t VariantKind;
+};
+
+/**
+ * The operand VariantKinds for symbolic disassembly.
+ */
+#define LLVMDisassembler_VariantKind_None 0 /* all targets */
+
+/**
+ * The ARM target VariantKinds.
+ */
+#define LLVMDisassembler_VariantKind_ARM_HI16 1 /* :upper16: */
+#define LLVMDisassembler_VariantKind_ARM_LO16 2 /* :lower16: */
+
+/**
+ * The type for the symbol lookup function.  This may be called by the
+ * disassembler for such things like adding a comment for a PC plus a constant
+ * offset load instruction to use a symbol name instead of a load address value.
+ * It is passed the block information is saved when the disassembler context is
+ * created and a value of a symbol to look up.  If no symbol is found NULL is
+ * to be returned.
+ */
+typedef const char *(*LLVMSymbolLookupCallback)(void *DisInfo,
+                                                uint64_t SymbolValue);
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* !defined(__cplusplus) */
+
+/**
+ * Create a disassembler for the TripleName.  Symbolic disassembly is supported
+ * by passing a block of information in the DisInfo parameter and specifing the
+ * TagType and call back functions as described above.  These can all be passed
+ * as NULL.  If successful this returns a disassembler context if not it
+ * returns NULL.
+ */
+extern LLVMDisasmContextRef
+LLVMCreateDisasm(const char *TripleName,
+                 void *DisInfo,
+                 int TagType,
+                 LLVMOpInfoCallback GetOpInfo,
+                 LLVMSymbolLookupCallback SymbolLookUp);
+
+/**
+ * Dispose of a disassembler context.
+ */
+extern void
+LLVMDisasmDispose(LLVMDisasmContextRef DC);
+
+/**
+ * Disassmble a single instruction using the disassembler context specified in
+ * the parameter DC.  The bytes of the instruction are specified in the parameter
+ * Bytes, and contains at least BytesSize number of bytes.  The instruction is
+ * at the address specified by the PC parameter.  If a valid instruction can be
+ * disassembled its string is returned indirectly in OutString which whos size
+ * is specified in the parameter OutStringSize.  This function returns the
+ * number of bytes in the instruction or zero if there was no valid instruction.
+ */
+extern size_t
+LLVMDisasmInstruction(LLVMDisasmContextRef DC,
+                      uint8_t *Bytes,
+                      uint64_t BytesSize,
+                      uint64_t PC,
+                      char *OutString,
+                      size_t OutStringSize);
+
+#ifdef __cplusplus
+}
+#endif /* !defined(__cplusplus) */
+
+#endif /* !defined(LLVM_C_DISASSEMBLER_H) */
diff --git a/include/llvm-c/EnhancedDisassembly.h b/include/llvm-c/EnhancedDisassembly.h
index 28ac0ed..0c173c2 100644
--- a/include/llvm-c/EnhancedDisassembly.h
+++ b/include/llvm-c/EnhancedDisassembly.h
@@ -44,7 +44,7 @@ typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
  @param arg An anonymous argument for client use.
  @result 0 if the register could be read; -1 otherwise.
  */
-typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID, 
+typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID,
                                         void* arg);
 
 /*!
@@ -83,7 +83,7 @@ typedef void *EDTokenRef;
  Encapsulates an operand of an instruction.
  */
 typedef void *EDOperandRef;
-  
+
 /*!
  @functiongroup Getting a disassembler
  */
@@ -91,7 +91,7 @@ typedef void *EDOperandRef;
 /*!
  @function EDGetDisassembler
  Gets the disassembler for a given target.
- @param disassembler A pointer whose target will be filled in with the 
+ @param disassembler A pointer whose target will be filled in with the
    disassembler.
  @param triple Identifies the target.  Example: "x86_64-apple-darwin10"
  @param syntax The assembly syntax to use when decoding instructions.
@@ -104,12 +104,12 @@ int EDGetDisassembler(EDDisassemblerRef *disassembler,
 /*!
  @functiongroup Generic architectural queries
  */
-  
+
 /*!
  @function EDGetRegisterName
  Gets the human-readable name for a given register.
  @param regName A pointer whose target will be pointed at the name of the
-   register.  The name does not need to be deallocated and will be 
+   register.  The name does not need to be deallocated and will be
  @param disassembler The disassembler to query for the name.
  @param regID The register identifier, as returned by EDRegisterTokenValue.
  @result 0 on success; -1 otherwise.
@@ -117,7 +117,7 @@ int EDGetDisassembler(EDDisassemblerRef *disassembler,
 int EDGetRegisterName(const char** regName,
                       EDDisassemblerRef disassembler,
                       unsigned regID);
-  
+
 /*!
  @function EDRegisterIsStackPointer
  Determines if a register is one of the platform's stack-pointer registers.
@@ -137,16 +137,16 @@ int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
  */
 int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
                                unsigned regID);
-  
+
 /*!
  @functiongroup Creating and querying instructions
  */
-  
+
 /*!
  @function EDCreateInst
  Gets a set of contiguous instructions from a disassembler.
  @param insts A pointer to an array that will be filled in with the
-   instructions.  Must have at least count entries.  Entries not filled in will 
+   instructions.  Must have at least count entries.  Entries not filled in will
    be set to NULL.
  @param count The maximum number of instructions to fill in.
  @param disassembler The disassembler to use when decoding the instructions.
@@ -197,7 +197,7 @@ int EDGetInstString(const char **buf,
  @result 0 on success; -1 otherwise.
  */
 int EDInstID(unsigned *instID, EDInstRef inst);
-  
+
 /*!
  @function EDInstIsBranch
  @param inst The instruction to be queried.
@@ -217,7 +217,7 @@ int EDInstIsMove(EDInstRef inst);
 /*!
  @function EDBranchTargetID
  @param inst The instruction to be queried.
- @result The ID of the branch target operand, suitable for use with 
+ @result The ID of the branch target operand, suitable for use with
    EDCopyOperand.  -1 if no such operand exists.
  */
 int EDBranchTargetID(EDInstRef inst);
@@ -225,7 +225,7 @@ int EDBranchTargetID(EDInstRef inst);
 /*!
  @function EDMoveSourceID
  @param inst The instruction to be queried.
- @result The ID of the move source operand, suitable for use with 
+ @result The ID of the move source operand, suitable for use with
    EDCopyOperand.  -1 if no such operand exists.
  */
 int EDMoveSourceID(EDInstRef inst);
@@ -233,7 +233,7 @@ int EDMoveSourceID(EDInstRef inst);
 /*!
  @function EDMoveTargetID
  @param inst The instruction to be queried.
- @result The ID of the move source operand, suitable for use with 
+ @result The ID of the move source operand, suitable for use with
    EDCopyOperand.  -1 if no such operand exists.
  */
 int EDMoveTargetID(EDInstRef inst);
@@ -241,7 +241,7 @@ int EDMoveTargetID(EDInstRef inst);
 /*!
  @functiongroup Creating and querying tokens
  */
-  
+
 /*!
  @function EDNumTokens
  @param inst The instruction to be queried.
@@ -261,7 +261,7 @@ int EDNumTokens(EDInstRef inst);
 int EDGetToken(EDTokenRef *token,
                EDInstRef inst,
                int index);
-  
+
 /*!
  @function EDGetTokenString
  Gets the disassembled text for a token.
@@ -287,7 +287,7 @@ int EDOperandIndexForToken(EDTokenRef token);
  @result 1 if the token is whitespace; 0 if not; -1 on error.
  */
 int EDTokenIsWhitespace(EDTokenRef token);
-  
+
 /*!
  @function EDTokenIsPunctuation
  @param token The token to be queried.
@@ -335,18 +335,18 @@ int EDLiteralTokenAbsoluteValue(uint64_t *value,
 
 /*!
  @function EDRegisterTokenValue
- @param registerID A pointer whose target will be filled in with the LLVM 
+ @param registerID A pointer whose target will be filled in with the LLVM
    register identifier for the token.
  @param token The token to be queried.
  @result 0 on success; -1 otherwise.
  */
 int EDRegisterTokenValue(unsigned *registerID,
                          EDTokenRef token);
-  
+
 /*!
  @functiongroup Creating and querying operands
  */
-  
+
 /*!
  @function EDNumOperands
  @param inst The instruction to be queried.
@@ -366,7 +366,7 @@ int EDNumOperands(EDInstRef inst);
 int EDGetOperand(EDOperandRef *operand,
                  EDInstRef inst,
                  int index);
-  
+
 /*!
  @function EDOperandIsRegister
  @param operand The operand to be queried.
@@ -391,13 +391,13 @@ int EDOperandIsMemory(EDOperandRef operand);
 /*!
  @function EDRegisterOperandValue
  @param value A pointer whose target will be filled in with the LLVM register ID
-   of the register named by the operand.  
+   of the register named by the operand.
  @param operand The operand to be queried.
  @result 0 on success; -1 otherwise.
  */
 int EDRegisterOperandValue(unsigned *value,
                            EDOperandRef operand);
-  
+
 /*!
  @function EDImmediateOperandValue
  @param value A pointer whose target will be filled in with the value of the
@@ -427,7 +427,7 @@ int EDEvaluateOperand(uint64_t *result,
                       EDOperandRef operand,
                       EDRegisterReaderCallback regReader,
                       void *arg);
-  
+
 #ifdef __BLOCKS__
 
 /*!
@@ -458,13 +458,13 @@ typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID);
 typedef int (^EDTokenVisitor_t)(EDTokenRef token);
 
 /*! @functiongroup Block-based interfaces */
-  
+
 /*!
  @function EDBlockCreateInsts
  Gets a set of contiguous instructions from a disassembler, using a block to
  read memory.
  @param insts A pointer to an array that will be filled in with the
-   instructions.  Must have at least count entries.  Entries not filled in will 
+   instructions.  Must have at least count entries.  Entries not filled in will
    be set to NULL.
  @param count The maximum number of instructions to fill in.
  @param disassembler The disassembler to use when decoding the instructions.
@@ -505,7 +505,7 @@ int EDBlockVisitTokens(EDInstRef inst,
                        EDTokenVisitor_t visitor);
 
 #endif
-  
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/llvm-c/Object.h b/include/llvm-c/Object.h
new file mode 100644
index 0000000..6e72b59
--- /dev/null
+++ b/include/llvm-c/Object.h
@@ -0,0 +1,77 @@
+/*===-- llvm-c/Object.h - Object Lib C Iface --------------------*- C++ -*-===*/
+/*                                                                            */
+/*                     The LLVM Compiler Infrastructure                       */
+/*                                                                            */
+/* This file is distributed under the University of Illinois Open Source      */
+/* License. See LICENSE.TXT for details.                                      */
+/*                                                                            */
+/*===----------------------------------------------------------------------===*/
+/*                                                                            */
+/* This header declares the C interface to libLLVMObject.a, which             */
+/* implements object file reading and writing.                                */
+/*                                                                            */
+/* Many exotic languages can interoperate with C code but have a harder time  */
+/* with C++ due to name mangling. So in addition to C, this interface enables */
+/* tools written in such languages.                                           */
+/*                                                                            */
+/*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_OBJECT_H
+#define LLVM_C_OBJECT_H
+
+#include "llvm-c/Core.h"
+#include "llvm/Config/llvm-config.h"
+
+#ifdef __cplusplus
+#include "llvm/Object/ObjectFile.h"
+
+extern "C" {
+#endif
+
+
+typedef struct LLVMOpaqueObjectFile *LLVMObjectFileRef;
+
+typedef struct LLVMOpaqueSectionIterator *LLVMSectionIteratorRef;
+
+LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf);
+void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile);
+
+LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile);
+void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI);
+LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile,
+                                LLVMSectionIteratorRef SI);
+void LLVMMoveToNextSection(LLVMSectionIteratorRef SI);
+const char *LLVMGetSectionName(LLVMSectionIteratorRef SI);
+uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI);
+const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI);
+
+
+#ifdef __cplusplus
+}
+
+namespace llvm {
+  namespace object {
+    inline ObjectFile *unwrap(LLVMObjectFileRef OF) {
+      return reinterpret_cast<ObjectFile*>(OF);
+    }
+
+    inline LLVMObjectFileRef wrap(const ObjectFile *OF) {
+      return reinterpret_cast<LLVMObjectFileRef>(const_cast<ObjectFile*>(OF));
+    }
+
+    inline ObjectFile::section_iterator *unwrap(LLVMSectionIteratorRef SI) {
+      return reinterpret_cast<ObjectFile::section_iterator*>(SI);
+    }
+
+    inline LLVMSectionIteratorRef
+    wrap(const ObjectFile::section_iterator *SI) {
+      return reinterpret_cast<LLVMSectionIteratorRef>
+        (const_cast<ObjectFile::section_iterator*>(SI));
+    }
+  }
+}
+
+#endif /* defined(__cplusplus) */
+
+#endif
+
diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h
index 2ddfb38..cf8d71f 100644
--- a/include/llvm-c/Transforms/Scalar.h
+++ b/include/llvm-c/Transforms/Scalar.h
@@ -52,6 +52,9 @@ void LLVMAddLICMPass(LLVMPassManagerRef PM);
 /** See llvm::createLoopDeletionPass function. */
 void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM);
 
+/** See llvm::createLoopIdiomPass function */
+void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM);
+
 /** See llvm::createLoopRotatePass function. */
 void LLVMAddLoopRotatePass(LLVMPassManagerRef PM);
 
@@ -77,6 +80,9 @@ void LLVMAddSCCPPass(LLVMPassManagerRef PM);
 void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM);
 
 /** See llvm::createScalarReplAggregatesPass function. */
+void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM);
+
+/** See llvm::createScalarReplAggregatesPass function. */
 void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
                                                   int Threshold);
 
@@ -95,6 +101,19 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM);
 /** See llvm::createVerifierPass function. */
 void LLVMAddVerifierPass(LLVMPassManagerRef PM);
 
+/** See llvm::createCorrelatedValuePropagationPass function */
+void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM);
+
+/** See llvm::createEarlyCSEPass function */
+void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM);
+
+/** See llvm::createTypeBasedAliasAnalysisPass function */
+void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM);
+
+/** See llvm::createBasicAliasAnalysisPass function */
+void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM);
+
+
 #ifdef __cplusplus
 }
 #endif /* defined(__cplusplus) */
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index 1c42ce0..7ea7ad0 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -72,7 +72,7 @@ lto_get_version(void);
 
 
 /**
- * Returns the last error string or NULL if last operation was sucessful.
+ * Returns the last error string or NULL if last operation was successful.
  */
 extern const char*
 lto_get_error_message(void);
@@ -127,7 +127,15 @@ lto_module_create_from_memory(const void* mem, size_t length);
  * Returns NULL on error (check lto_get_error_message() for details).
  */
 extern lto_module_t
-lto_module_create_from_fd(int fd, const char *path, off_t size);
+lto_module_create_from_fd(int fd, const char *path, size_t file_size);
+
+/**
+ * Loads an object file from disk. The seek point of fd is not preserved.
+ * Returns NULL on error (check lto_get_error_message() for details).
+ */
+extern lto_module_t
+lto_module_create_from_fd_at_offset(int fd, const char *path, size_t file_size,
+                                    size_t map_size, off_t offset);
 
 
 /**
@@ -255,7 +263,7 @@ lto_codegen_write_merged_modules(lto_code_gen_t cg, const char* path);
 
 /**
  * Generates code for all added modules into one native object file.
- * On sucess returns a pointer to a generated mach-o/ELF buffer and
+ * On success returns a pointer to a generated mach-o/ELF buffer and
  * length set to the buffer size.  The buffer is owned by the 
  * lto_code_gen_t and will be freed when lto_codegen_dispose()
  * is called, or lto_codegen_compile() is called again.
@@ -264,6 +272,13 @@ lto_codegen_write_merged_modules(lto_code_gen_t cg, const char* path);
 extern const void*
 lto_codegen_compile(lto_code_gen_t cg, size_t* length);
 
+/**
+ * Generates code for all added modules into one native object file.
+ * The name of the file is written to name. Returns true on error.
+ */
+extern bool
+lto_codegen_compile_to_file(lto_code_gen_t cg, const char** name);
+
 
 /**
  * Sets options to help debug codegen bugs.
diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h
index ca4138b..21b8c86 100644
--- a/include/llvm/ADT/APFloat.h
+++ b/include/llvm/ADT/APFloat.h
@@ -353,6 +353,10 @@ namespace llvm {
                   unsigned FormatPrecision = 0,
                   unsigned FormatMaxPadding = 3) const;
 
+    /// getExactInverse - If this value has an exact multiplicative inverse,
+    /// store it in inv and return true.
+    bool getExactInverse(APFloat *inv) const;
+
   private:
 
     /* Trivial queries.  */
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index d1fd3e5..2feef07 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -818,6 +818,7 @@ public:
   APInt usub_ov(const APInt &RHS, bool &Overflow) const;
   APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
   APInt smul_ov(const APInt &RHS, bool &Overflow) const;
+  APInt umul_ov(const APInt &RHS, bool &Overflow) const;
   APInt sshl_ov(unsigned Amt, bool &Overflow) const;
 
   /// @returns the bit value at bitPosition
@@ -1372,7 +1373,7 @@ public:
 
   /// Calculate the magic number for unsigned division by a constant.
   struct mu;
-  mu magicu() const;
+  mu magicu(unsigned LeadingZeros = 0) const;
 
   /// @}
   /// @name Building-block Operations for APInt and APFloat
diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h
index d3ea9c0..97e42cb 100644
--- a/include/llvm/ADT/ArrayRef.h
+++ b/include/llvm/ADT/ArrayRef.h
@@ -22,8 +22,8 @@ namespace llvm {
   ///
   /// This class does not own the underlying data, it is expected to be used in
   /// situations where the data resides in some other buffer, whose lifetime
-  /// extends past that of the StringRef. For this reason, it is not in general
-  /// safe to store a ArrayRef.
+  /// extends past that of the ArrayRef. For this reason, it is not in general
+  /// safe to store an ArrayRef.
   ///
   /// This is intended to be trivially copyable, so it should be passed by
   /// value.
@@ -79,6 +79,8 @@ namespace llvm {
     /// empty - Check if the array is empty.
     bool empty() const { return Length == 0; }
     
+    const T *data() const { return Data; }
+    
     /// size - Get the array size.
     size_t size() const { return Length; }
     
@@ -94,10 +96,22 @@ namespace llvm {
       return Data[Length-1];
     }
     
+    /// slice(n) - Chop off the first N elements of the array.
+    ArrayRef<T> slice(unsigned N) {
+      assert(N <= size() && "Invalid specifier");
+      return ArrayRef<T>(data()+N, size()-N);
+    }
+
+    /// slice(n, m) - Chop off the first N elements of the array, and keep M
+    /// elements in the array.
+    ArrayRef<T> slice(unsigned N, unsigned M) {
+      assert(N+M <= size() && "Invalid specifier");
+      return ArrayRef<T>(data()+N, M);
+    }
+    
     /// @}
     /// @name Operator Overloads
     /// @{
-    
     const T &operator[](size_t Index) const {
       assert(Index < Length && "Invalid index!");
       return Data[Index];
@@ -106,7 +120,6 @@ namespace llvm {
     /// @}
     /// @name Expensive Operations
     /// @{
-    
     std::vector<T> vec() const {
       return std::vector<T>(Data, Data+Length);
     }
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index 61d6ae7..0f1cfeb 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -53,13 +53,13 @@ public:
     CopyFrom(other);
   }
 
-  explicit DenseMap(unsigned NumInitBuckets = 64) {
+  explicit DenseMap(unsigned NumInitBuckets = 0) {
     init(NumInitBuckets);
   }
 
   template<typename InputIt>
   DenseMap(const InputIt &I, const InputIt &E) {
-    init(64);
+    init(NextPowerOf2(std::distance(I, E)));
     insert(I, E);
   }
   
@@ -72,7 +72,8 @@ public:
       P->first.~KeyT();
     }
 #ifndef NDEBUG
-    memset(Buckets, 0x5a, sizeof(BucketT)*NumBuckets);
+    if (NumBuckets)
+      memset((void*)Buckets, 0x5a, sizeof(BucketT)*NumBuckets);
 #endif
     operator delete(Buckets);
   }
@@ -98,7 +99,10 @@ public:
   unsigned size() const { return NumEntries; }
 
   /// Grow the densemap so that it has at least Size buckets. Does not shrink
-  void resize(size_t Size) { grow(Size); }
+  void resize(size_t Size) {
+    if (Size > NumBuckets)
+      grow(Size);
+  }
 
   void clear() {
     if (NumEntries == 0 && NumTombstones == 0) return;
@@ -248,23 +252,29 @@ private:
 
     if (NumBuckets) {
 #ifndef NDEBUG
-      memset(Buckets, 0x5a, sizeof(BucketT)*NumBuckets);
+      memset((void*)Buckets, 0x5a, sizeof(BucketT)*NumBuckets);
 #endif
       operator delete(Buckets);
     }
-    Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT) *
-                                                 other.NumBuckets));
+
+    NumBuckets = other.NumBuckets;
+
+    if (NumBuckets == 0) {
+      Buckets = 0;
+      return;
+    }
+
+    Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT) * NumBuckets));
 
     if (isPodLike<KeyInfoT>::value && isPodLike<ValueInfoT>::value)
-      memcpy(Buckets, other.Buckets, other.NumBuckets * sizeof(BucketT));
+      memcpy(Buckets, other.Buckets, NumBuckets * sizeof(BucketT));
     else
-      for (size_t i = 0; i < other.NumBuckets; ++i) {
+      for (size_t i = 0; i < NumBuckets; ++i) {
         new (&Buckets[i].first) KeyT(other.Buckets[i].first);
         if (!KeyInfoT::isEqual(Buckets[i].first, getEmptyKey()) &&
             !KeyInfoT::isEqual(Buckets[i].first, getTombstoneKey()))
           new (&Buckets[i].second) ValueT(other.Buckets[i].second);
       }
-    NumBuckets = other.NumBuckets;
   }
 
   BucketT *InsertIntoBucket(const KeyT &Key, const ValueT &Value,
@@ -279,11 +289,14 @@ private:
     // table completely filled with tombstones, no lookup would ever succeed,
     // causing infinite loops in lookup.
     ++NumEntries;
-    if (NumEntries*4 >= NumBuckets*3 ||
-        NumBuckets-(NumEntries+NumTombstones) < NumBuckets/8) {
+    if (NumEntries*4 >= NumBuckets*3) {
       this->grow(NumBuckets * 2);
       LookupBucketFor(Key, TheBucket);
     }
+    if (NumBuckets-(NumEntries+NumTombstones) < NumBuckets/8) {
+      this->grow(NumBuckets);
+      LookupBucketFor(Key, TheBucket);
+    }
 
     // If we are writing over a tombstone, remember this.
     if (!KeyInfoT::isEqual(TheBucket->first, getEmptyKey()))
@@ -313,6 +326,11 @@ private:
     unsigned ProbeAmt = 1;
     BucketT *BucketsPtr = Buckets;
 
+    if (NumBuckets == 0) {
+      FoundBucket = 0;
+      return false;
+    }
+
     // FoundTombstone - Keep track of whether we find a tombstone while probing.
     BucketT *FoundTombstone = 0;
     const KeyT EmptyKey = getEmptyKey();
@@ -354,6 +372,12 @@ private:
     NumEntries = 0;
     NumTombstones = 0;
     NumBuckets = InitBuckets;
+
+    if (InitBuckets == 0) {
+      Buckets = 0;
+      return;
+    }
+
     assert(InitBuckets && (InitBuckets & (InitBuckets-1)) == 0 &&
            "# initial buckets must be a power of two!");
     Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT)*InitBuckets));
@@ -367,6 +391,9 @@ private:
     unsigned OldNumBuckets = NumBuckets;
     BucketT *OldBuckets = Buckets;
 
+    if (NumBuckets < 64)
+      NumBuckets = 64;
+
     // Double the number of buckets.
     while (NumBuckets < AtLeast)
       NumBuckets <<= 1;
@@ -398,7 +425,8 @@ private:
     }
 
 #ifndef NDEBUG
-    memset(OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets);
+    if (OldNumBuckets)
+      memset((void*)OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets);
 #endif
     // Free the old table.
     operator delete(OldBuckets);
@@ -431,13 +459,22 @@ private:
     }
 
 #ifndef NDEBUG
-    memset(OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets);
+    memset((void*)OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets);
 #endif
     // Free the old table.
     operator delete(OldBuckets);
 
     NumEntries = 0;
   }
+  
+public:
+  /// Return the approximate size (in bytes) of the actual map.
+  /// This is just the raw memory used by DenseMap.
+  /// If entries are pointers to objects, the size of the referenced objects
+  /// are not included.
+  size_t getMemorySize() const {
+    return NumBuckets * sizeof(BucketT);
+  }
 };
 
 template<typename KeyT, typename ValueT,
diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h
index 25e341b..744b6f4 100644
--- a/include/llvm/ADT/DenseMapInfo.h
+++ b/include/llvm/ADT/DenseMapInfo.h
@@ -157,7 +157,10 @@ struct DenseMapInfo<std::pair<T, U> > {
     key ^= (key >> 31);
     return (unsigned)key;
   }
-  static bool isEqual(const Pair& LHS, const Pair& RHS) { return LHS == RHS; }
+  static bool isEqual(const Pair &LHS, const Pair &RHS) {
+    return FirstInfo::isEqual(LHS.first, RHS.first) && 
+           SecondInfo::isEqual(LHS.second, RHS.second);
+  }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h
index b9e5cbd..dd13a2c 100644
--- a/include/llvm/ADT/DepthFirstIterator.h
+++ b/include/llvm/ADT/DepthFirstIterator.h
@@ -143,8 +143,7 @@ public:
   static inline _Self end(const GraphT& G, SetType &S) { return _Self(S); }
 
   inline bool operator==(const _Self& x) const {
-    return VisitStack.size() == x.VisitStack.size() &&
-           VisitStack == x.VisitStack;
+    return VisitStack == x.VisitStack;
   }
   inline bool operator!=(const _Self& x) const { return !operator==(x); }
 
diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h
index 879dbd0..52e0434 100644
--- a/include/llvm/ADT/FoldingSet.h
+++ b/include/llvm/ADT/FoldingSet.h
@@ -209,10 +209,10 @@ template<typename T> struct FoldingSetTrait;
 /// for FoldingSetTrait implementations.
 ///
 template<typename T> struct DefaultFoldingSetTrait {
-  static void Profile(const T& X, FoldingSetNodeID& ID) {
+  static void Profile(const T &X, FoldingSetNodeID &ID) {
     X.Profile(ID);
   }
-  static void Profile(T& X, FoldingSetNodeID& ID) {
+  static void Profile(T &X, FoldingSetNodeID &ID) {
     X.Profile(ID);
   }
 
@@ -267,7 +267,7 @@ template<typename T, typename Ctx> struct ContextualFoldingSetTrait
 /// is often much larger than necessary, and the possibility of heap
 /// allocation means it requires a non-trivial destructor call.
 class FoldingSetNodeIDRef {
-  const unsigned* Data;
+  const unsigned *Data;
   size_t Size;
 public:
   FoldingSetNodeIDRef() : Data(0), Size(0) {}
@@ -310,9 +310,10 @@ public:
   void AddInteger(unsigned long long I);
   void AddBoolean(bool B) { AddInteger(B ? 1U : 0U); }
   void AddString(StringRef String);
+  void AddNodeID(const FoldingSetNodeID &ID);
 
   template <typename T>
-  inline void Add(const T& x) { FoldingSetTrait<T>::Profile(x, *this); }
+  inline void Add(const T &x) { FoldingSetTrait<T>::Profile(x, *this); }
 
   /// clear - Clear the accumulated profile, allowing this FoldingSetNodeID
   /// object to be used to compute a new profile.
@@ -548,7 +549,7 @@ public:
     return static_cast<T*>(NodePtr);
   }
 
-  inline FoldingSetIterator& operator++() {          // Preincrement
+  inline FoldingSetIterator &operator++() {          // Preincrement
     advance();
     return *this;
   }
@@ -596,10 +597,10 @@ public:
   FoldingSetBucketIterator(void **Bucket, bool) :
     FoldingSetBucketIteratorImpl(Bucket, true) {}
 
-  T& operator*() const { return *static_cast<T*>(Ptr); }
-  T* operator->() const { return static_cast<T*>(Ptr); }
+  T &operator*() const { return *static_cast<T*>(Ptr); }
+  T *operator->() const { return static_cast<T*>(Ptr); }
 
-  inline FoldingSetBucketIterator& operator++() { // Preincrement
+  inline FoldingSetBucketIterator &operator++() { // Preincrement
     advance();
     return *this;
   }
@@ -615,36 +616,36 @@ template <typename T>
 class FoldingSetNodeWrapper : public FoldingSetNode {
   T data;
 public:
-  explicit FoldingSetNodeWrapper(const T& x) : data(x) {}
+  explicit FoldingSetNodeWrapper(const T &x) : data(x) {}
   virtual ~FoldingSetNodeWrapper() {}
 
   template<typename A1>
-  explicit FoldingSetNodeWrapper(const A1& a1)
+  explicit FoldingSetNodeWrapper(const A1 &a1)
     : data(a1) {}
 
   template <typename A1, typename A2>
-  explicit FoldingSetNodeWrapper(const A1& a1, const A2& a2)
+  explicit FoldingSetNodeWrapper(const A1 &a1, const A2 &a2)
     : data(a1,a2) {}
 
   template <typename A1, typename A2, typename A3>
-  explicit FoldingSetNodeWrapper(const A1& a1, const A2& a2, const A3& a3)
+  explicit FoldingSetNodeWrapper(const A1 &a1, const A2 &a2, const A3 &a3)
     : data(a1,a2,a3) {}
 
   template <typename A1, typename A2, typename A3, typename A4>
-  explicit FoldingSetNodeWrapper(const A1& a1, const A2& a2, const A3& a3,
-                                 const A4& a4)
+  explicit FoldingSetNodeWrapper(const A1 &a1, const A2 &a2, const A3 &a3,
+                                 const A4 &a4)
     : data(a1,a2,a3,a4) {}
 
   template <typename A1, typename A2, typename A3, typename A4, typename A5>
-  explicit FoldingSetNodeWrapper(const A1& a1, const A2& a2, const A3& a3,
-                                 const A4& a4, const A5& a5)
+  explicit FoldingSetNodeWrapper(const A1 &a1, const A2 &a2, const A3 &a3,
+                                 const A4 &a4, const A5 &a5)
   : data(a1,a2,a3,a4,a5) {}
 
 
-  void Profile(FoldingSetNodeID& ID) { FoldingSetTrait<T>::Profile(data, ID); }
+  void Profile(FoldingSetNodeID &ID) { FoldingSetTrait<T>::Profile(data, ID); }
 
-  T& getValue() { return data; }
-  const T& getValue() const { return data; }
+  T &getValue() { return data; }
+  const T &getValue() const { return data; }
 
   operator T&() { return data; }
   operator const T&() const { return data; }
@@ -661,20 +662,22 @@ class FastFoldingSetNode : public FoldingSetNode {
 protected:
   explicit FastFoldingSetNode(const FoldingSetNodeID &ID) : FastID(ID) {}
 public:
-  void Profile(FoldingSetNodeID& ID) const { ID = FastID; }
+  void Profile(FoldingSetNodeID &ID) const { 
+    ID.AddNodeID(FastID); 
+  }
 };
 
 //===----------------------------------------------------------------------===//
 // Partial specializations of FoldingSetTrait.
 
 template<typename T> struct FoldingSetTrait<T*> {
-  static inline void Profile(const T* X, FoldingSetNodeID& ID) {
+  static inline void Profile(const T *X, FoldingSetNodeID &ID) {
     ID.AddPointer(X);
   }
 };
 
 template<typename T> struct FoldingSetTrait<const T*> {
-  static inline void Profile(const T* X, FoldingSetNodeID& ID) {
+  static inline void Profile(const T *X, FoldingSetNodeID &ID) {
     ID.AddPointer(X);
   }
 };
diff --git a/include/llvm/ADT/ImmutableIntervalMap.h b/include/llvm/ADT/ImmutableIntervalMap.h
index 0d8fcf3..fa7ccb9 100644
--- a/include/llvm/ADT/ImmutableIntervalMap.h
+++ b/include/llvm/ADT/ImmutableIntervalMap.h
@@ -10,6 +10,10 @@
 // This file defines the ImmutableIntervalMap class.
 //
 //===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_IMMUTABLE_INTERVAL_MAP_H
+#define LLVM_ADT_IMMUTABLE_INTERVAL_MAP_H
+
 #include "llvm/ADT/ImmutableMap.h"
 
 namespace llvm {
@@ -240,3 +244,5 @@ private:
 };
 
 } // end namespace llvm
+
+#endif
diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h
index 79f24d3..f28ebf3 100644
--- a/include/llvm/ADT/IntervalMap.h
+++ b/include/llvm/ADT/IntervalMap.h
@@ -1328,6 +1328,10 @@ public:
   /// const_iterator - Create an iterator that isn't pointing anywhere.
   const_iterator() : map(0) {}
 
+  /// setMap - Change the map iterated over. This call must be followed by a
+  /// call to goToBegin(), goToEnd(), or find()
+  void setMap(const IntervalMap &m) { map = const_cast<IntervalMap*>(&m); }
+
   /// valid - Return true if the current position is valid, false for end().
   bool valid() const { return path.valid(); }
 
diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h
index 37d4ac9..2f6fd2b 100644
--- a/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -42,18 +42,16 @@ namespace llvm {
 //===----------------------------------------------------------------------===//
   template <class Derived>
   class RefCountedBase {
-    unsigned ref_cnt;
+    mutable unsigned ref_cnt;
 
-  protected:
+  public:
     RefCountedBase() : ref_cnt(0) {}
 
-    void Retain() { ++ref_cnt; }
-    void Release() {
+    void Retain() const { ++ref_cnt; }
+    void Release() const {
       assert (ref_cnt > 0 && "Reference count is already zero.");
-      if (--ref_cnt == 0) delete static_cast<Derived*>(this);
+      if (--ref_cnt == 0) delete static_cast<const Derived*>(this);
     }
-
-    friend class IntrusiveRefCntPtr<Derived>;
   };
 
 //===----------------------------------------------------------------------===//
@@ -64,21 +62,21 @@ namespace llvm {
 ///  inherit from RefCountedBaseVPTR can't be allocated on stack -
 ///  attempting to do this will produce a compile error.
 //===----------------------------------------------------------------------===//
-  template <class Derived>
   class RefCountedBaseVPTR {
-    unsigned ref_cnt;
+    mutable unsigned ref_cnt;
 
   protected:
     RefCountedBaseVPTR() : ref_cnt(0) {}
     virtual ~RefCountedBaseVPTR() {}
 
-    void Retain() { ++ref_cnt; }
-    void Release() {
+    void Retain() const { ++ref_cnt; }
+    void Release() const {
       assert (ref_cnt > 0 && "Reference count is already zero.");
       if (--ref_cnt == 0) delete this;
     }
 
-    friend class IntrusiveRefCntPtr<Derived>;
+    template <typename T>
+    friend class IntrusiveRefCntPtr;
   };
 
 //===----------------------------------------------------------------------===//
@@ -155,6 +153,10 @@ namespace llvm {
       other.Obj = Obj;
       Obj = tmp;
     }
+    
+    void resetWithoutRelease() {
+      Obj = 0;
+    }
 
   private:
     void retain() { if (Obj) Obj->Retain(); }
diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h
index 61de042..13b98ce 100644
--- a/include/llvm/ADT/PointerUnion.h
+++ b/include/llvm/ADT/PointerUnion.h
@@ -19,16 +19,33 @@
 
 namespace llvm {
 
-  /// getPointerUnionTypeNum - If the argument has type PT1* or PT2* return
-  /// false or true respectively.
-  template <typename PT1, typename PT2>
-  static inline int getPointerUnionTypeNum(PT1 *P) { return 0; }
-  template <typename PT1, typename PT2>
-  static inline int getPointerUnionTypeNum(PT2 *P) { return 1; }
-  template <typename PT1, typename PT2>
-  static inline int getPointerUnionTypeNum(...) { return -1; }
-  
-  
+  template <typename T>
+  struct PointerUnionTypeSelectorReturn {
+    typedef T Return;
+  };
+
+  /// \brief Get a type based on whether two types are the same or not. For:
+  /// @code
+  /// typedef typename PointerUnionTypeSelector<T1, T2, EQ, NE>::Return Ret;
+  /// @endcode
+  /// Ret will be EQ type if T1 is same as T2 or NE type otherwise.
+  template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
+  struct PointerUnionTypeSelector {
+    typedef typename PointerUnionTypeSelectorReturn<RET_NE>::Return Return;
+  };
+
+  template <typename T, typename RET_EQ, typename RET_NE>
+  struct PointerUnionTypeSelector<T, T, RET_EQ, RET_NE> {
+    typedef typename PointerUnionTypeSelectorReturn<RET_EQ>::Return Return;
+  };
+
+  template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
+  struct PointerUnionTypeSelectorReturn<
+                            PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE> > {
+    typedef typename PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>::Return
+        Return;
+  };
+
   /// Provide PointerLikeTypeTraits for void* that is used by PointerUnion
   /// for the two template arguments.
   template <typename PT1, typename PT2>
@@ -65,6 +82,16 @@ namespace llvm {
                            PointerUnionUIntTraits<PT1,PT2> > ValTy;
   private:
     ValTy Val;
+
+    struct IsPT1 {
+      static const int Num = 0;
+    };
+    struct IsPT2 {
+      static const int Num = 1;
+    };
+    template <typename T>
+    struct UNION_DOESNT_CONTAIN_TYPE { };
+
   public:
     PointerUnion() {}
     
@@ -87,8 +114,11 @@ namespace llvm {
     /// is<T>() return true if the Union currently holds the type matching T.
     template<typename T>
     int is() const {
-      int TyNo = ::llvm::getPointerUnionTypeNum<PT1, PT2>((T*)0);
-      assert(TyNo != -1 && "Type query could never succeed on PointerUnion!");
+      typedef typename
+        ::llvm::PointerUnionTypeSelector<PT1, T, IsPT1,
+          ::llvm::PointerUnionTypeSelector<PT2, T, IsPT2,
+                                    UNION_DOESNT_CONTAIN_TYPE<T> > >::Return Ty;
+      int TyNo = Ty::Num;
       return static_cast<int>(Val.getInt()) == TyNo;
     }
     
@@ -175,6 +205,34 @@ namespace llvm {
     typedef PointerUnion<InnerUnion, PT3> ValTy;
   private:
     ValTy Val;
+
+    struct IsInnerUnion {
+      ValTy Val;
+      IsInnerUnion(ValTy val) : Val(val) { }
+      template<typename T>
+      int is() const {
+        return Val.template is<InnerUnion>() && 
+               Val.template get<InnerUnion>().template is<T>();
+      }
+      template<typename T>
+      T get() const {
+        return Val.template get<InnerUnion>().template get<T>();
+      }
+    };
+
+    struct IsPT3 {
+      ValTy Val;
+      IsPT3(ValTy val) : Val(val) { }
+      template<typename T>
+      int is() const {
+        return Val.template is<T>();
+      }
+      template<typename T>
+      T get() const {
+        return Val.template get<T>();
+      }
+    };
+
   public:
     PointerUnion3() {}
     
@@ -196,11 +254,12 @@ namespace llvm {
     /// is<T>() return true if the Union currently holds the type matching T.
     template<typename T>
     int is() const {
-      // Is it PT1/PT2?
-      if (::llvm::getPointerUnionTypeNum<PT1, PT2>((T*)0) != -1)
-        return Val.template is<InnerUnion>() && 
-               Val.template get<InnerUnion>().template is<T>();
-      return Val.template is<T>();
+      // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
+      typedef typename
+        ::llvm::PointerUnionTypeSelector<PT1, T, IsInnerUnion,
+          ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3 >
+                                                                   >::Return Ty;
+      return Ty(Val).is<T>();
     }
     
     /// get<T>() - Return the value of the specified pointer type. If the
@@ -208,11 +267,12 @@ namespace llvm {
     template<typename T>
     T get() const {
       assert(is<T>() && "Invalid accessor called");
-      // Is it PT1/PT2?
-      if (::llvm::getPointerUnionTypeNum<PT1, PT2>((T*)0) != -1)
-        return Val.template get<InnerUnion>().template get<T>();
-      
-      return Val.template get<T>();
+      // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
+      typedef typename
+        ::llvm::PointerUnionTypeSelector<PT1, T, IsInnerUnion,
+          ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3 >
+                                                                   >::Return Ty;
+      return Ty(Val).get<T>();
     }
     
     /// dyn_cast<T>() - If the current value is of the specified pointer type,
@@ -302,12 +362,13 @@ namespace llvm {
     /// is<T>() return true if the Union currently holds the type matching T.
     template<typename T>
     int is() const {
-      // Is it PT1/PT2?
-      if (::llvm::getPointerUnionTypeNum<PT1, PT2>((T*)0) != -1)
-        return Val.template is<InnerUnion1>() && 
-               Val.template get<InnerUnion1>().template is<T>();
-      return Val.template is<InnerUnion2>() && 
-             Val.template get<InnerUnion2>().template is<T>();
+      // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
+      typedef typename
+        ::llvm::PointerUnionTypeSelector<PT1, T, InnerUnion1,
+          ::llvm::PointerUnionTypeSelector<PT2, T, InnerUnion1, InnerUnion2 >
+                                                                   >::Return Ty;
+      return Val.template is<Ty>() && 
+             Val.template get<Ty>().template is<T>();
     }
     
     /// get<T>() - Return the value of the specified pointer type. If the
@@ -315,11 +376,12 @@ namespace llvm {
     template<typename T>
     T get() const {
       assert(is<T>() && "Invalid accessor called");
-      // Is it PT1/PT2?
-      if (::llvm::getPointerUnionTypeNum<PT1, PT2>((T*)0) != -1)
-        return Val.template get<InnerUnion1>().template get<T>();
-      
-      return Val.template get<InnerUnion2>().template get<T>();
+      // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
+      typedef typename
+        ::llvm::PointerUnionTypeSelector<PT1, T, InnerUnion1,
+          ::llvm::PointerUnionTypeSelector<PT2, T, InnerUnion1, InnerUnion2 >
+                                                                   >::Return Ty;
+      return Val.template get<Ty>().template get<T>();
     }
     
     /// dyn_cast<T>() - If the current value is of the specified pointer type,
diff --git a/include/llvm/ADT/ScopedHashTable.h b/include/llvm/ADT/ScopedHashTable.h
index af3c482..a6803ee 100644
--- a/include/llvm/ADT/ScopedHashTable.h
+++ b/include/llvm/ADT/ScopedHashTable.h
@@ -96,6 +96,9 @@ public:
   ScopedHashTableScope(ScopedHashTable<K, V, KInfo, AllocatorTy> &HT);
   ~ScopedHashTableScope();
 
+  ScopedHashTableScope *getParentScope() { return PrevScope; }
+  const ScopedHashTableScope *getParentScope() const { return PrevScope; }
+  
 private:
   friend class ScopedHashTable<K, V, KInfo, AllocatorTy>;
   ScopedHashTableVal<K, V> *getLastValInScope() {
@@ -141,9 +144,14 @@ public:
 
 template <typename K, typename V, typename KInfo, typename AllocatorTy>
 class ScopedHashTable {
+public:
+  /// ScopeTy - This is a helpful typedef that allows clients to get easy access
+  /// to the name of the scope for this hash table.
+  typedef ScopedHashTableScope<K, V, KInfo, AllocatorTy> ScopeTy;
+private:
   typedef ScopedHashTableVal<K, V> ValTy;
   DenseMap<K, ValTy*, KInfo> TopLevelMap;
-  ScopedHashTableScope<K, V, KInfo, AllocatorTy> *CurScope;
+  ScopeTy *CurScope;
   
   AllocatorTy Allocator;
   
@@ -157,9 +165,6 @@ public:
     assert(CurScope == 0 && TopLevelMap.empty() && "Scope imbalance!");
   }
   
-  /// ScopeTy - This is a helpful typedef that allows clients to get easy access
-  /// to the name of the scope for this hash table.
-  typedef ScopedHashTableScope<K, V, KInfo, AllocatorTy> ScopeTy;
 
   /// Access to the allocator.
   typedef typename ReferenceAdder<AllocatorTy>::result AllocatorRefTy;
@@ -180,13 +185,7 @@ public:
   }
 
   void insert(const K &Key, const V &Val) {
-    assert(CurScope && "No scope active!");
-
-    ScopedHashTableVal<K, V> *&KeyEntry = TopLevelMap[Key];
-
-    KeyEntry = ValTy::Create(CurScope->getLastValInScope(), KeyEntry, Key, Val,
-                             Allocator);
-    CurScope->setLastValInScope(KeyEntry);
+    insertIntoScope(CurScope, Key, Val);
   }
 
   typedef ScopedHashTableIterator<K, V, KInfo> iterator;
@@ -199,6 +198,21 @@ public:
     if (I == TopLevelMap.end()) return end();
     return iterator(I->second);
   }
+  
+  ScopeTy *getCurScope() { return CurScope; }
+  const ScopeTy *getCurScope() const { return CurScope; }
+
+  /// insertIntoScope - This inserts the specified key/value at the specified
+  /// (possibly not the current) scope.  While it is ok to insert into a scope
+  /// that isn't the current one, it isn't ok to insert *underneath* an existing
+  /// value of the specified key.
+  void insertIntoScope(ScopeTy *S, const K &Key, const V &Val) {
+    assert(S && "No scope active!");
+    ScopedHashTableVal<K, V> *&KeyEntry = TopLevelMap[Key];
+    KeyEntry = ValTy::Create(S->getLastValInScope(), KeyEntry, Key, Val,
+                             Allocator);
+    S->setLastValInScope(KeyEntry);
+  }
 };
 
 /// ScopedHashTableScope ctor - Install this as the current scope for the hash
diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h
index ff32ba8..9992858 100644
--- a/include/llvm/ADT/SmallPtrSet.h
+++ b/include/llvm/ADT/SmallPtrSet.h
@@ -133,7 +133,7 @@ private:
   void shrink_and_clear();
 
   /// Grow - Allocate a larger backing store for the buckets and move it over.
-  void Grow();
+  void Grow(unsigned NewSize);
 
   void operator=(const SmallPtrSetImpl &RHS);  // DO NOT IMPLEMENT.
 protected:
diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h
index f137ea2..fda99c6 100644
--- a/include/llvm/ADT/Statistic.h
+++ b/include/llvm/ADT/Statistic.h
@@ -121,6 +121,9 @@ protected:
 /// \brief Enable the collection and printing of statistics.
 void EnableStatistics();
 
+/// \brief Check if statistics are enabled.
+bool AreStatisticsEnabled();
+
 /// \brief Print statistics to the file returned by CreateInfoOutputFile().
 void PrintStatistics();
 
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index acbed66..5f5c041 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -20,7 +20,6 @@
 #include <cctype>
 #include <cstdio>
 #include <string>
-#include <vector>
 
 namespace llvm {
 template<typename T> class SmallVectorImpl;
@@ -153,7 +152,7 @@ void SplitString(StringRef Source,
                  SmallVectorImpl<StringRef> &OutFragments,
                  StringRef Delimiters = " \t\n\v\f\r");
 
-/// HashString - Hash funtion for strings.
+/// HashString - Hash function for strings.
 ///
 /// This is the Bernstein hash function.
 //
diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index bad0e6f..934cacc 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -17,7 +17,6 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Allocator.h"
 #include <cstring>
-#include <string>
 
 namespace llvm {
   template<typename ValueT>
@@ -81,16 +80,6 @@ protected:
   StringMapImpl(unsigned InitSize, unsigned ItemSize);
   void RehashTable();
 
-  /// ShouldRehash - Return true if the table should be rehashed after a new
-  /// element was recently inserted.
-  bool ShouldRehash() const {
-    // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
-    // the buckets are empty (meaning that many are filled with tombstones),
-    // grow the table.
-    return NumItems*4 > NumBuckets*3 ||
-           NumBuckets-(NumItems+NumTombstones) < NumBuckets/8;
-  }
-
   /// LookupBucketFor - Look up the bucket that the specified string should end
   /// up in.  If it already exists as a key in the map, the Item pointer for the
   /// specified bucket will be non-null.  Otherwise, it will be null.  In either
@@ -339,9 +328,9 @@ public:
       --NumTombstones;
     Bucket.Item = KeyValue;
     ++NumItems;
+    assert(NumItems + NumTombstones <= NumBuckets);
 
-    if (ShouldRehash())
-      RehashTable();
+    RehashTable();
     return true;
   }
 
@@ -359,6 +348,7 @@ public:
     }
 
     NumItems = 0;
+    NumTombstones = 0;
   }
 
   /// GetOrCreateValue - Look up the specified key in the table.  If a value
@@ -378,13 +368,13 @@ public:
     if (Bucket.Item == getTombstoneVal())
       --NumTombstones;
     ++NumItems;
+    assert(NumItems + NumTombstones <= NumBuckets);
 
     // Fill in the bucket for the hash table.  The FullHashValue was already
     // filled in by LookupBucketFor.
     Bucket.Item = NewItem;
 
-    if (ShouldRehash())
-      RehashTable();
+    RehashTable();
     return *NewItem;
   }
 
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index e6dcc23..2659bce 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -64,7 +64,8 @@ public:
     x86_64,  // X86-64: amd64, x86_64
     xcore,   // XCore: xcore
     mblaze,  // MBlaze: mblaze
-    ptx,     // PTX: ptx
+    ptx32,   // PTX: ptx (32-bit)
+    ptx64,   // PTX: ptx (64-bit)
 
     InvalidArch
   };
@@ -72,7 +73,8 @@ public:
     UnknownVendor,
 
     Apple,
-    PC
+    PC,
+    SCEI
   };
   enum OSType {
     UnknownOS,
@@ -82,8 +84,10 @@ public:
     Darwin,
     DragonFly,
     FreeBSD,
+    IOS,
     Linux,
     Lv2,        // PS3
+    MacOSX,
     MinGW32,    // i*86-pc-mingw32, *-w64-mingw32
     NetBSD,
     OpenBSD,
@@ -221,21 +225,81 @@ public:
   /// if the environment component is present).
   StringRef getOSAndEnvironmentName() const;
 
+  /// getOSNumber - Parse the version number from the OS name component of the
+  /// triple, if present.
+  ///
+  /// For example, "fooos1.2.3" would return (1, 2, 3).
+  ///
+  /// If an entry is not defined, it will be returned as 0.
+  void getOSVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const;
 
-  /// getDarwinNumber - Parse the 'darwin number' out of the specific target
-  /// triple.  For example, if we have darwin8.5 return 8,5,0.  If any entry is
-  /// not defined, return 0's.  This requires that the triple have an OSType of
-  /// darwin before it is called.
-  void getDarwinNumber(unsigned &Maj, unsigned &Min, unsigned &Revision) const;
-
-  /// getDarwinMajorNumber - Return just the major version number, this is
+  /// getOSMajorVersion - Return just the major version number, this is
   /// specialized because it is a common query.
-  unsigned getDarwinMajorNumber() const {
-    unsigned Maj, Min, Rev;
-    getDarwinNumber(Maj, Min, Rev);
+  unsigned getOSMajorVersion() const {
+    unsigned Maj, Min, Micro;
+    getDarwinNumber(Maj, Min, Micro);
     return Maj;
   }
 
+  void getDarwinNumber(unsigned &Major, unsigned &Minor,
+                       unsigned &Micro) const {
+    return getOSVersion(Major, Minor, Micro);
+  }
+
+  unsigned getDarwinMajorNumber() const {
+    return getOSMajorVersion();
+  }
+
+  /// isOSVersionLT - Helper function for doing comparisons against version
+  /// numbers included in the target triple.
+  bool isOSVersionLT(unsigned Major, unsigned Minor = 0,
+                     unsigned Micro = 0) const {
+    unsigned LHS[3];
+    getOSVersion(LHS[0], LHS[1], LHS[2]);
+
+    if (LHS[0] != Major)
+      return LHS[0] < Major;
+    if (LHS[1] != Minor)
+      return LHS[1] < Minor;
+    if (LHS[2] != Micro)
+      return LHS[1] < Micro;
+
+    return false;
+  }
+
+  /// isMacOSX - Is this a Mac OS X triple. For legacy reasons, we support both
+  /// "darwin" and "osx" as OS X triples.
+  bool isMacOSX() const {
+    return getOS() == Triple::Darwin || getOS() == Triple::MacOSX;
+  }
+
+  /// isOSDarwin - Is this a "Darwin" OS (OS X or iOS).
+  bool isOSDarwin() const {
+    return isMacOSX() ||getOS() == Triple::IOS;
+  }
+
+  /// isOSWindows - Is this a "Windows" OS.
+  bool isOSWindows() const {
+    return getOS() == Triple::Win32 || getOS() == Triple::Cygwin ||
+      getOS() == Triple::MinGW32;
+  }
+
+  /// isMacOSXVersionLT - Comparison function for checking OS X version
+  /// compatibility, which handles supporting skewed version numbering schemes
+  /// used by the "darwin" triples.
+  unsigned isMacOSXVersionLT(unsigned Major, unsigned Minor = 0,
+                          unsigned Micro = 0) const {
+    assert(isMacOSX() && "Not an OS X triple!");
+
+    // If this is OS X, expect a sane version number.
+    if (getOS() == Triple::MacOSX)
+      return isOSVersionLT(Major, Minor, Micro);
+
+    // Otherwise, compare to the "Darwin" number.
+    assert(Major == 10 && "Unexpected major version");
+    return isOSVersionLT(Minor + 4, Micro, 0);
+  }
+    
   /// @}
   /// @name Mutators
   /// @{
diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h
index 865fcb3..bcacfd9 100644
--- a/include/llvm/ADT/ilist.h
+++ b/include/llvm/ADT/ilist.h
@@ -289,7 +289,7 @@ template<typename NodeTy> struct simplify_type<const ilist_iterator<NodeTy> > {
 //===----------------------------------------------------------------------===//
 //
 /// iplist - The subset of list functionality that can safely be used on nodes
-/// of polymorphic types, i.e. a heterogenous list with a common base class that
+/// of polymorphic types, i.e. a heterogeneous list with a common base class that
 /// holds the next/prev pointers.  The only state of the list itself is a single
 /// pointer to the head of the list.
 ///
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index 71a5982..8f9708b 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -38,7 +38,6 @@
 #define LLVM_ANALYSIS_ALIAS_ANALYSIS_H
 
 #include "llvm/Support/CallSite.h"
-#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index e844d10..03149c6 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -259,6 +259,7 @@ private:
       if (CallSites[i] == CS.getInstruction()) {
         CallSites[i] = CallSites.back();
         CallSites.pop_back();
+        --i; --e;  // Revisit the moved entry.
       }
   }
   void setVolatile() { Volatile = true; }
@@ -283,6 +284,7 @@ class AliasSetTracker {
   class ASTCallbackVH : public CallbackVH {
     AliasSetTracker *AST;
     virtual void deleted();
+    virtual void allUsesReplacedWith(Value *);
   public:
     ASTCallbackVH(Value *V, AliasSetTracker *AST = 0);
     ASTCallbackVH &operator=(Value *V);
diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h
index ac8f596..61614e3 100644
--- a/include/llvm/Analysis/CFGPrinter.h
+++ b/include/llvm/Analysis/CFGPrinter.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_ANALYSIS_CFGPRINTER_H
 #define LLVM_ANALYSIS_CFGPRINTER_H
 
+#include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Assembly/Writer.h"
diff --git a/include/llvm/Analysis/DIBuilder.h b/include/llvm/Analysis/DIBuilder.h
index 417dbc4..5846dbf 100644
--- a/include/llvm/Analysis/DIBuilder.h
+++ b/include/llvm/Analysis/DIBuilder.h
@@ -16,6 +16,7 @@
 #define LLVM_ANALYSIS_DIBUILDER_H
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 
 namespace llvm {
@@ -146,6 +147,30 @@ namespace llvm {
                             uint64_t AlignInBits, uint64_t OffsetInBits, 
                             unsigned Flags, DIType Ty);
 
+    /// createObjCIVar - Create debugging information entry for Objective-C
+    /// instance variable.
+    /// @param Name         Member name.
+    /// @param File         File where this member is defined.
+    /// @param LineNo       Line number.
+    /// @param SizeInBits   Member size.
+    /// @param AlignInBits  Member alignment.
+    /// @param OffsetInBits Member offset.
+    /// @param Flags        Flags to encode member attribute, e.g. private
+    /// @param Ty           Parent type.
+    /// @param PropertyName Name of the Objective C property assoicated with
+    ///                     this ivar.
+    /// @param GetterName   Name of the Objective C property getter selector.
+    /// @param SetterName   Name of the Objective C property setter selector.
+    /// @param PropertyAttributes Objective C property attributes.
+    DIType createObjCIVar(StringRef Name, DIFile File,
+                          unsigned LineNo, uint64_t SizeInBits, 
+                          uint64_t AlignInBits, uint64_t OffsetInBits, 
+                          unsigned Flags, DIType Ty,
+                          StringRef PropertyName = StringRef(),
+                          StringRef PropertyGetterName = StringRef(),
+                          StringRef PropertySetterName = StringRef(),
+                          unsigned PropertyAttributes = 0);
+
     /// createClassType - Create debugging information entry for a class.
     /// @param Scope        Scope in which this class is defined.
     /// @param Name         class name.
@@ -278,7 +303,7 @@ namespace llvm {
     DIDescriptor createUnspecifiedParameter();
 
     /// getOrCreateArray - Get a DIArray, create one if required.
-    DIArray getOrCreateArray(Value *const *Elements, unsigned NumElements);
+    DIArray getOrCreateArray(ArrayRef<Value *> Elements);
 
     /// getOrCreateSubrange - Create a descriptor for a value range.  This
     /// implicitly uniques the values returned.
@@ -326,11 +351,14 @@ namespace llvm {
     /// @param AlwaysPreserve Boolean. Set to true if debug info for this
     ///                       variable should be preserved in optimized build.
     /// @param Flags          Flags, e.g. artificial variable.
+    /// @param ArgNo       If this variable is an arugment then this argument's
+    ///                    number. 1 indicates 1st argument.
     DIVariable createLocalVariable(unsigned Tag, DIDescriptor Scope,
                                    StringRef Name,
                                    DIFile File, unsigned LineNo,
                                    DIType Ty, bool AlwaysPreserve = false,
-                                   unsigned Flags = 0);
+                                   unsigned Flags = 0,
+                                   unsigned ArgNo = 0);
 
 
     /// createComplexVariable - Create a new descriptor for the specified
@@ -342,12 +370,13 @@ namespace llvm {
     /// @param File        File where this variable is defined.
     /// @param LineNo      Line number.
     /// @param Ty          Variable Type
-    /// @param Addr        A pointer to a vector of complex address operations.
-    /// @param NumAddr     Num of address operations in the vector.
+    /// @param Addr        An array of complex address operations.
+    /// @param ArgNo       If this variable is an arugment then this argument's
+    ///                    number. 1 indicates 1st argument.
     DIVariable createComplexVariable(unsigned Tag, DIDescriptor Scope,
                                      StringRef Name, DIFile F, unsigned LineNo,
-                                     DIType Ty, Value *const *Addr,
-                                     unsigned NumAddr);
+                                     DIType Ty, ArrayRef<Value *> Addr,
+                                     unsigned ArgNo = 0);
 
     /// createFunction - Create a new descriptor for the specified subprogram.
     /// See comments in DISubprogram for descriptions of these fields.
@@ -363,6 +392,7 @@ namespace llvm {
     ///                      This flags are used to emit dwarf attributes.
     /// @param isOptimized   True if optimization is ON.
     /// @param Fn            llvm::Function pointer.
+    /// @param TParam        Function template parameters.
     DISubprogram createFunction(DIDescriptor Scope, StringRef Name,
                                 StringRef LinkageName,
                                 DIFile File, unsigned LineNo,
@@ -370,7 +400,9 @@ namespace llvm {
                                 bool isDefinition,
                                 unsigned Flags = 0,
                                 bool isOptimized = false,
-                                Function *Fn = 0);
+                                Function *Fn = 0,
+                                MDNode *TParam = 0,
+                                MDNode *Decl = 0);
 
     /// createMethod - Create a new descriptor for the specified C++ method.
     /// See comments in DISubprogram for descriptions of these fields.
@@ -382,7 +414,7 @@ namespace llvm {
     /// @param Ty            Function type.
     /// @param isLocalToUnit True if this function is not externally visible..
     /// @param isDefinition  True if this is a function definition.
-    /// @param Virtuality    Attributes describing virutallness. e.g. pure 
+    /// @param Virtuality    Attributes describing virtualness. e.g. pure 
     ///                      virtual function.
     /// @param VTableIndex   Index no of this method in virtual table.
     /// @param VTableHolder  Type that holds vtable.
@@ -390,6 +422,7 @@ namespace llvm {
     ///                      This flags are used to emit dwarf attributes.
     /// @param isOptimized   True if optimization is ON.
     /// @param Fn            llvm::Function pointer.
+    /// @param TParam        Function template parameters.
     DISubprogram createMethod(DIDescriptor Scope, StringRef Name,
                               StringRef LinkageName,
                               DIFile File, unsigned LineNo,
@@ -399,7 +432,8 @@ namespace llvm {
                               MDNode *VTableHolder = 0,
                               unsigned Flags = 0,
                               bool isOptimized = false,
-                              Function *Fn = 0);
+                              Function *Fn = 0,
+                              MDNode *TParam = 0);
 
     /// createNameSpace - This creates new descriptor for a namespace
     /// with the specified parent scope.
diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h
index aa69088..c6cc8f7 100644
--- a/include/llvm/Analysis/DebugInfo.h
+++ b/include/llvm/Analysis/DebugInfo.h
@@ -332,6 +332,32 @@ namespace llvm {
     /// return base type size.
     uint64_t getOriginalTypeSize() const;
 
+    StringRef getObjCPropertyName() const { return getStringField(10); }
+    StringRef getObjCPropertyGetterName() const {
+      return getStringField(11);
+    }
+    StringRef getObjCPropertySetterName() const {
+      return getStringField(12);
+    }
+    bool isReadOnlyObjCProperty() {
+      return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_readonly) != 0;
+    }
+    bool isReadWriteObjCProperty() {
+      return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_readwrite) != 0;
+    }
+    bool isAssignObjCProperty() {
+      return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_assign) != 0;
+    }
+    bool isRetainObjCProperty() {
+      return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_retain) != 0;
+    }
+    bool isCopyObjCProperty() {
+      return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_copy) != 0;
+    }
+    bool isNonAtomicObjCProperty() {
+      return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_nonatomic) != 0;
+    }
+
     /// Verify - Verify that a derived type descriptor is well formed.
     bool Verify() const;
 
@@ -511,6 +537,10 @@ namespace llvm {
     bool describes(const Function *F);
 
     Function *getFunction() const { return getFunctionField(16); }
+    DIArray getTemplateParams() const { return getFieldAs<DIArray>(17); }
+    DISubprogram getFunctionDeclaration() const {
+      return getFieldAs<DISubprogram>(18);
+    }
   };
 
   /// DIGlobalVariable - This is a wrapper for a global variable.
@@ -564,7 +594,13 @@ namespace llvm {
       DIFile F = getFieldAs<DIFile>(3); 
       return F.getCompileUnit();
     }
-    unsigned getLineNumber() const      { return getUnsignedField(4); }
+    unsigned getLineNumber() const      { 
+      return (getUnsignedField(4) << 8) >> 8; 
+    }
+    unsigned getArgNumber() const       {
+      unsigned L = getUnsignedField(4); 
+      return L >> 24;
+    }
     DIType getType() const              { return getFieldAs<DIType>(5); }
     
     /// isArtificial - Return true if this variable is marked as "artificial".
@@ -586,7 +622,9 @@ namespace llvm {
     unsigned getNumAddrElements() const;
     
     uint64_t getAddrElement(unsigned Idx) const {
-      return getUInt64Field(Idx+6);
+      if (getVersion() <= llvm::LLVMDebugVersion8)
+        return getUInt64Field(Idx+6);
+      return getUInt64Field(Idx+7);
     }
 
     /// isBlockByrefVariable - Return true if the variable was declared as
@@ -660,214 +698,6 @@ namespace llvm {
     bool Verify() const;
   };
 
-  /// DIFactory - This object assists with the construction of the various
-  /// descriptors.
-  class DIFactory {
-    Module &M;
-    LLVMContext& VMContext;
-
-    Function *DeclareFn;     // llvm.dbg.declare
-    Function *ValueFn;       // llvm.dbg.value
-
-    DIFactory(const DIFactory &);     // DO NOT IMPLEMENT
-    void operator=(const DIFactory&); // DO NOT IMPLEMENT
-  public:
-    enum ComplexAddrKind { OpPlus=1, OpDeref };
-
-    explicit DIFactory(Module &m);
-
-    /// GetOrCreateArray - Create an descriptor for an array of descriptors.
-    /// This implicitly uniques the arrays created.
-    DIArray GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys);
-
-    /// GetOrCreateSubrange - Create a descriptor for a value range.  This
-    /// implicitly uniques the values returned.
-    DISubrange GetOrCreateSubrange(int64_t Lo, int64_t Hi);
-
-    /// CreateUnspecifiedParameter - Create unspeicified type descriptor
-    /// for a subroutine type.
-    DIDescriptor CreateUnspecifiedParameter();
-
-    /// CreateCompileUnit - Create a new descriptor for the specified compile
-    /// unit.
-    DICompileUnit CreateCompileUnit(unsigned LangID,
-                                    StringRef Filename,
-                                    StringRef Directory,
-                                    StringRef Producer,
-                                    bool isMain = false,
-                                    bool isOptimized = false,
-                                    StringRef Flags = "",
-                                    unsigned RunTimeVer = 0);
-
-    /// CreateFile -  Create a new descriptor for the specified file.
-    DIFile CreateFile(StringRef Filename, StringRef Directory,
-                      DICompileUnit CU);
-
-    /// CreateEnumerator - Create a single enumerator value.
-    DIEnumerator CreateEnumerator(StringRef Name, uint64_t Val);
-
-    /// CreateBasicType - Create a basic type like int, float, etc.
-    DIBasicType CreateBasicType(DIDescriptor Context, StringRef Name,
-                                DIFile F, unsigned LineNumber,
-                                uint64_t SizeInBits, uint64_t AlignInBits,
-                                uint64_t OffsetInBits, unsigned Flags,
-                                unsigned Encoding);
-
-    /// CreateBasicType - Create a basic type like int, float, etc.
-    DIBasicType CreateBasicTypeEx(DIDescriptor Context, StringRef Name,
-                                DIFile F, unsigned LineNumber,
-                                Constant *SizeInBits, Constant *AlignInBits,
-                                Constant *OffsetInBits, unsigned Flags,
-                                unsigned Encoding);
-
-    /// CreateDerivedType - Create a derived type like const qualified type,
-    /// pointer, typedef, etc.
-    DIDerivedType CreateDerivedType(unsigned Tag, DIDescriptor Context,
-                                    StringRef Name,
-                                    DIFile F,
-                                    unsigned LineNumber,
-                                    uint64_t SizeInBits, uint64_t AlignInBits,
-                                    uint64_t OffsetInBits, unsigned Flags,
-                                    DIType DerivedFrom);
-
-    /// CreateDerivedType - Create a derived type like const qualified type,
-    /// pointer, typedef, etc.
-    DIDerivedType CreateDerivedTypeEx(unsigned Tag, DIDescriptor Context,
-                                      StringRef Name,
-                                      DIFile F,
-                                      unsigned LineNumber,
-                                      Constant *SizeInBits, 
-                                      Constant *AlignInBits,
-                                      Constant *OffsetInBits, unsigned Flags,
-                                      DIType DerivedFrom);
-
-    /// CreateCompositeType - Create a composite type like array, struct, etc.
-    DICompositeType CreateCompositeType(unsigned Tag, DIDescriptor Context,
-                                        StringRef Name,
-                                        DIFile F,
-                                        unsigned LineNumber,
-                                        uint64_t SizeInBits,
-                                        uint64_t AlignInBits,
-                                        uint64_t OffsetInBits, unsigned Flags,
-                                        DIType DerivedFrom,
-                                        DIArray Elements,
-                                        unsigned RunTimeLang = 0,
-                                        MDNode *ContainingType = 0);
-
-    /// CreateTemporaryType - Create a temporary forward-declared type.
-    DIType CreateTemporaryType();
-    DIType CreateTemporaryType(DIFile F);
-
-    /// CreateArtificialType - Create a new DIType with "artificial" flag set.
-    DIType CreateArtificialType(DIType Ty);
-
-    /// CreateCompositeType - Create a composite type like array, struct, etc.
-    DICompositeType CreateCompositeTypeEx(unsigned Tag, DIDescriptor Context,
-                                          StringRef Name,
-                                          DIFile F,
-                                          unsigned LineNumber,
-                                          Constant *SizeInBits,
-                                          Constant *AlignInBits,
-                                          Constant *OffsetInBits, 
-                                          unsigned Flags,
-                                          DIType DerivedFrom,
-                                          DIArray Elements,
-                                          unsigned RunTimeLang = 0,
-                                          MDNode *ContainingType = 0);
-
-    /// CreateSubprogram - Create a new descriptor for the specified subprogram.
-    /// See comments in DISubprogram for descriptions of these fields.
-    DISubprogram CreateSubprogram(DIDescriptor Context, StringRef Name,
-                                  StringRef DisplayName,
-                                  StringRef LinkageName,
-                                  DIFile F, unsigned LineNo,
-                                  DIType Ty, bool isLocalToUnit,
-                                  bool isDefinition,
-                                  unsigned VK = 0,
-                                  unsigned VIndex = 0,
-                                  DIType ContainingType = DIType(),
-                                  unsigned Flags = 0,
-                                  bool isOptimized = false,
-                                  Function *Fn = 0);
-
-    /// CreateSubprogramDefinition - Create new subprogram descriptor for the
-    /// given declaration. 
-    DISubprogram CreateSubprogramDefinition(DISubprogram &SPDeclaration);
-
-    /// CreateGlobalVariable - Create a new descriptor for the specified global.
-    DIGlobalVariable
-    CreateGlobalVariable(DIDescriptor Context, StringRef Name,
-                         StringRef DisplayName,
-                         StringRef LinkageName,
-                         DIFile F,
-                         unsigned LineNo, DIType Ty, bool isLocalToUnit,
-                         bool isDefinition, llvm::GlobalVariable *GV);
-
-    /// CreateGlobalVariable - Create a new descriptor for the specified constant.
-    DIGlobalVariable
-    CreateGlobalVariable(DIDescriptor Context, StringRef Name,
-                         StringRef DisplayName,
-                         StringRef LinkageName,
-                         DIFile F,
-                         unsigned LineNo, DIType Ty, bool isLocalToUnit,
-                         bool isDefinition, llvm::Constant *C);
-
-    /// CreateVariable - Create a new descriptor for the specified variable.
-    DIVariable CreateVariable(unsigned Tag, DIDescriptor Context,
-                              StringRef Name,
-                              DIFile F, unsigned LineNo,
-                              DIType Ty, bool AlwaysPreserve = false,
-                              unsigned Flags = 0);
-
-    /// CreateComplexVariable - Create a new descriptor for the specified
-    /// variable which has a complex address expression for its address.
-    DIVariable CreateComplexVariable(unsigned Tag, DIDescriptor Context,
-                                     StringRef Name, DIFile F, unsigned LineNo,
-                                     DIType Ty, Value *const *Addr,
-                                     unsigned NumAddr);
-
-    /// CreateLexicalBlock - This creates a descriptor for a lexical block
-    /// with the specified parent context.
-    DILexicalBlock CreateLexicalBlock(DIDescriptor Context, DIFile F,
-                                      unsigned Line = 0, unsigned Col = 0);
-
-    /// CreateNameSpace - This creates new descriptor for a namespace
-    /// with the specified parent context.
-    DINameSpace CreateNameSpace(DIDescriptor Context, StringRef Name,
-                                DIFile F, unsigned LineNo);
-
-    /// CreateLocation - Creates a debug info location.
-    DILocation CreateLocation(unsigned LineNo, unsigned ColumnNo,
-                              DIScope S, DILocation OrigLoc);
-
-    /// CreateLocation - Creates a debug info location.
-    DILocation CreateLocation(unsigned LineNo, unsigned ColumnNo,
-                              DIScope S, MDNode *OrigLoc = 0);
-
-    /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-    Instruction *InsertDeclare(llvm::Value *Storage, DIVariable D,
-                               BasicBlock *InsertAtEnd);
-
-    /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-    Instruction *InsertDeclare(llvm::Value *Storage, DIVariable D,
-                               Instruction *InsertBefore);
-
-    /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
-    Instruction *InsertDbgValueIntrinsic(llvm::Value *V, uint64_t Offset,
-                                         DIVariable D, BasicBlock *InsertAtEnd);
-
-    /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
-    Instruction *InsertDbgValueIntrinsic(llvm::Value *V, uint64_t Offset,
-                                       DIVariable D, Instruction *InsertBefore);
-
-    // RecordType - Record DIType in a module such that it is not lost even if
-    // it is not referenced through debug info anchors.
-    void RecordType(DIType T);
-
-  private:
-    Constant *GetTagConstant(unsigned TAG);
-  };
-
   /// getDISubprogram - Find subprogram that is enclosing this scope.
   DISubprogram getDISubprogram(const MDNode *Scope);
 
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index 578e6ab..e56d24d 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -28,6 +28,7 @@ class IVUsers;
 class ScalarEvolution;
 class SCEV;
 class IVUsers;
+class TargetData;
 
 /// IVStrideUse - Keep track of one use of a strided induction variable.
 /// The Expr member keeps track of the expression, User is the actual user
@@ -122,6 +123,7 @@ class IVUsers : public LoopPass {
   LoopInfo *LI;
   DominatorTree *DT;
   ScalarEvolution *SE;
+  TargetData *TD;
   SmallPtrSet<Instruction*,16> Processed;
 
   /// IVUses - A list of all tracked IV uses of induction variable expressions
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index b08bf57..a0cce51 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -43,7 +43,7 @@ namespace llvm {
   /// InlineCost - Represent the cost of inlining a function. This
   /// supports special values for functions which should "always" or
   /// "never" be inlined. Otherwise, the cost represents a unitless
-  /// amount; smaller values increase the likelyhood of the function
+  /// amount; smaller values increase the likelihood of the function
   /// being inlined.
   class InlineCost {
     enum Kind {
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index dff1ba2..bc6e55f 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -55,6 +55,21 @@ namespace llvm {
   Value *SimplifyFDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
                           const DominatorTree *DT = 0);
 
+  /// SimplifySRemInst - Given operands for an SRem, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifySRemInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const DominatorTree *DT = 0);
+
+  /// SimplifyURemInst - Given operands for a URem, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyURemInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const DominatorTree *DT = 0);
+
+  /// SimplifyFRemInst - Given operands for an FRem, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyFRemInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const DominatorTree *DT = 0);
+
   /// SimplifyShlInst - Given operands for a Shl, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
diff --git a/include/llvm/Analysis/Lint.h b/include/llvm/Analysis/Lint.h
index eb65d22..7c88b13 100644
--- a/include/llvm/Analysis/Lint.h
+++ b/include/llvm/Analysis/Lint.h
@@ -20,8 +20,6 @@
 #ifndef LLVM_ANALYSIS_LINT_H
 #define LLVM_ANALYSIS_LINT_H
 
-#include <string>
-
 namespace llvm {
 
 class FunctionPass;
diff --git a/include/llvm/Analysis/LiveValues.h b/include/llvm/Analysis/LiveValues.h
deleted file mode 100644
index b92cb78..0000000
--- a/include/llvm/Analysis/LiveValues.h
+++ /dev/null
@@ -1,99 +0,0 @@
-//===- LiveValues.h - Liveness information for LLVM IR Values. ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interface for the LLVM IR Value liveness
-// analysis pass.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ANALYSIS_LIVEVALUES_H
-#define LLVM_ANALYSIS_LIVEVALUES_H
-
-#include "llvm/Pass.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-
-namespace llvm {
-
-class DominatorTree;
-class LoopInfo;
-class Value;
-
-/// LiveValues - Analysis that provides liveness information for
-/// LLVM IR Values.
-///
-class LiveValues : public FunctionPass {
-  DominatorTree *DT;
-  LoopInfo *LI;
-
-  /// Memo - A bunch of state to be associated with a value.
-  ///
-  struct Memo {
-    /// Used - The set of blocks which contain a use of the value.
-    ///
-    SmallPtrSet<const BasicBlock *, 4> Used;
-
-    /// LiveThrough - A conservative approximation of the set of blocks in
-    /// which the value is live-through, meaning blocks properly dominated
-    /// by the definition, and from which blocks containing uses of the
-    /// value are reachable.
-    ///
-    SmallPtrSet<const BasicBlock *, 4> LiveThrough;
-
-    /// Killed - A conservative approximation of the set of blocks in which
-    /// the value is used and not live-out.
-    ///
-    SmallPtrSet<const BasicBlock *, 4> Killed;
-  };
-
-  /// Memos - Remembers the Memo for each Value. This is populated on
-  /// demand.
-  ///
-  DenseMap<const Value *, Memo> Memos;
-
-  /// getMemo - Retrieve an existing Memo for the given value if one
-  /// is available, otherwise compute a new one.
-  ///
-  Memo &getMemo(const Value *V);
-
-  /// compute - Compute a new Memo for the given value.
-  ///
-  Memo &compute(const Value *V);
-
-public:
-  static char ID;
-  LiveValues();
-
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-  virtual bool runOnFunction(Function &F);
-  virtual void releaseMemory();
-
-  /// isUsedInBlock - Test if the given value is used in the given block.
-  ///
-  bool isUsedInBlock(const Value *V, const BasicBlock *BB);
-
-  /// isLiveThroughBlock - Test if the given value is known to be
-  /// live-through the given block, meaning that the block is properly
-  /// dominated by the value's definition, and there exists a block
-  /// reachable from it that contains a use. This uses a conservative
-  /// approximation that errs on the side of returning false.
-  ///
-  bool isLiveThroughBlock(const Value *V, const BasicBlock *BB);
-
-  /// isKilledInBlock - Test if the given value is known to be killed in
-  /// the given block, meaning that the block contains a use of the value,
-  /// and no blocks reachable from the block contain a use. This uses a
-  /// conservative approximation that errs on the side of returning false.
-  ///
-  bool isKilledInBlock(const Value *V, const BasicBlock *BB);
-};
-
-}  // end namespace llvm
-
-#endif
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index 4d5dd19..b56fe08 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -48,6 +48,11 @@ namespace llvm {
       /// this occurs when we see a may-aliased store to the memory location we
       /// care about.
       ///
+      /// There are several cases that may be interesting here:
+      ///   1. Loads are clobbered by may-alias stores.
+      ///   2. Loads are considered clobbered by partially-aliased loads.  The
+      ///      client may choose to analyze deeper into these cases.
+      ///
       /// A dependence query on the first instruction of the entry block will
       /// return a clobber(self) result.
       Clobber,
@@ -350,6 +355,20 @@ namespace llvm {
                                           BasicBlock::iterator ScanIt,
                                           BasicBlock *BB);
     
+    
+    /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
+    /// looks at a memory location for a load (specified by MemLocBase, Offs,
+    /// and Size) and compares it against a load.  If the specified load could
+    /// be safely widened to a larger integer load that is 1) still efficient,
+    /// 2) safe for the target, and 3) would provide the specified memory
+    /// location value, then this function returns the size in bytes of the
+    /// load width to use.  If not, this returns zero.
+    static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase,
+                                                    int64_t MemLocOffs,
+                                                    unsigned MemLocSize,
+                                                    const LoadInst *LI,
+                                                    const TargetData &TD);
+    
   private:
     MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall,
                                            BasicBlock::iterator ScanIt,
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index 5b0c5b1..0eff75f 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -159,12 +159,6 @@ namespace llvm {
 
   //===--------------------------------------------------------------------===//
   //
-  // createLiveValuesPass - This creates an instance of the LiveValues pass.
-  //
-  FunctionPass *createLiveValuesPass();
-
-  //===--------------------------------------------------------------------===//
-  //
   /// createLazyValueInfoPass - This creates an instance of the LazyValueInfo
   /// pass.
   FunctionPass *createLazyValueInfoPass();
diff --git a/include/llvm/Analysis/PathProfileInfo.h b/include/llvm/Analysis/PathProfileInfo.h
index 263763f..cef6d2d 100644
--- a/include/llvm/Analysis/PathProfileInfo.h
+++ b/include/llvm/Analysis/PathProfileInfo.h
@@ -16,7 +16,6 @@
 
 #include "llvm/BasicBlock.h"
 #include "llvm/Analysis/PathNumbering.h"
-#include <stack>
 
 namespace llvm {
 
diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h
index 2cd6ae3..0eddb91 100644
--- a/include/llvm/Analysis/PostDominators.h
+++ b/include/llvm/Analysis/PostDominators.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_ANALYSIS_POST_DOMINATORS_H
 #define LLVM_ANALYSIS_POST_DOMINATORS_H
 
-#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Analysis/Dominators.h"
 
 namespace llvm {
 
@@ -101,37 +101,6 @@ template <> struct GraphTraits<PostDominatorTree*>
   }
 };
 
-/// PostDominanceFrontier Class - Concrete subclass of DominanceFrontier that is
-/// used to compute the a post-dominance frontier.
-///
-struct PostDominanceFrontier : public DominanceFrontierBase {
-  static char ID;
-  PostDominanceFrontier()
-    : DominanceFrontierBase(ID, true) {
-      initializePostDominanceFrontierPass(*PassRegistry::getPassRegistry());
-    }
-
-  virtual bool runOnFunction(Function &) {
-    Frontiers.clear();
-    PostDominatorTree &DT = getAnalysis<PostDominatorTree>();
-    Roots = DT.getRoots();
-    if (const DomTreeNode *Root = DT.getRootNode())
-      calculate(DT, Root);
-    return false;
-  }
-
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesAll();
-    AU.addRequired<PostDominatorTree>();
-  }
-
-private:
-  const DomSetType &calculate(const PostDominatorTree &DT,
-                              const DomTreeNode *Node);
-};
-
-FunctionPass* createPostDomFrontier();
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
index a36ca11..9d89545 100644
--- a/include/llvm/Analysis/RegionInfo.h
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -28,9 +28,10 @@
 #define LLVM_ANALYSIS_REGION_INFO_H
 
 #include "llvm/ADT/PointerIntPair.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/DominanceFrontier.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Support/Allocator.h"
+#include <map>
 
 namespace llvm {
 
@@ -145,7 +146,7 @@ inline Region* RegionNode::getNodeAs<Region>() const {
 /// two connections to the remaining graph. It can be used to analyze or
 /// optimize parts of the control flow graph.
 ///
-/// A <em> simple Region </em> is connected to the remaing graph by just two
+/// A <em> simple Region </em> is connected to the remaining graph by just two
 /// edges. One edge entering the Region and another one leaving the Region.
 ///
 /// An <em> extended Region </em> (or just Region) is a subgraph that can be
@@ -335,12 +336,16 @@ public:
     return RI;
   }
 
+  /// PrintStyle - Print region in difference ways.
+  enum PrintStyle { PrintNone, PrintBB, PrintRN  };
+
   /// @brief Print the region.
   ///
   /// @param OS The output stream the Region is printed to.
   /// @param printTree Print also the tree of subregions.
   /// @param level The indentation level used for printing.
-  void print(raw_ostream& OS, bool printTree = true, unsigned level = 0) const;
+  void print(raw_ostream& OS, bool printTree = true, unsigned level = 0,
+             enum PrintStyle Style = PrintNone) const;
 
   /// @brief Print the region to stderr.
   void dump() const;
@@ -438,7 +443,7 @@ public:
 
   /// @brief Move all direct child nodes of this Region to another Region.
   ///
-  /// @param To The Region the child nodes will be transfered to.
+  /// @param To The Region the child nodes will be transferred to.
   void transferChildrenTo(Region *To);
 
   /// @brief Verify if the region is a correct region.
diff --git a/include/llvm/Analysis/RegionIterator.h b/include/llvm/Analysis/RegionIterator.h
index ced5b52..7adc71c 100644
--- a/include/llvm/Analysis/RegionIterator.h
+++ b/include/llvm/Analysis/RegionIterator.h
@@ -20,7 +20,7 @@
 
 namespace llvm {
 //===----------------------------------------------------------------------===//
-/// @brief Hierachical RegionNode successor iterator.
+/// @brief Hierarchical RegionNode successor iterator.
 ///
 /// This iterator iterates over all successors of a RegionNode.
 ///
diff --git a/include/llvm/Analysis/RegionPass.h b/include/llvm/Analysis/RegionPass.h
index aedc06a..5403e09 100644
--- a/include/llvm/Analysis/RegionPass.h
+++ b/include/llvm/Analysis/RegionPass.h
@@ -54,7 +54,7 @@ public:
   /// @brief Get a pass to print the LLVM IR in the region.
   ///
   /// @param O      The ouput stream to print the Region.
-  /// @param Banner The banner to seperate different printed passes.
+  /// @param Banner The banner to separate different printed passes.
   ///
   /// @return The pass to print the LLVM IR in the region.
   Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index d193806..a62f6a8 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -24,6 +24,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Instructions.h"
 #include "llvm/Function.h"
+#include "llvm/Operator.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/Allocator.h"
@@ -72,6 +73,29 @@ namespace llvm {
     void operator=(const SCEV &);  // DO NOT IMPLEMENT
 
   public:
+    /// NoWrapFlags are bitfield indices into SubclassData.
+    ///
+    /// Add and Mul expressions may have no-unsigned-wrap <NUW> or
+    /// no-signed-wrap <NSW> properties, which are derived from the IR
+    /// operator. NSW is a misnomer that we use to mean no signed overflow or
+    /// underflow.
+    ///
+    /// AddRec expression may have a no-self-wraparound <NW> property if the
+    /// result can never reach the start value. This property is independent of
+    /// the actual start value and step direction. Self-wraparound is defined
+    /// purely in terms of the recurrence's loop, step size, and
+    /// bitwidth. Formally, a recurrence with no self-wraparound satisfies:
+    /// abs(step) * max-iteration(loop) <= unsigned-max(bitwidth).
+    ///
+    /// Note that NUW and NSW are also valid properties of a recurrence, and
+    /// either implies NW. For convenience, NW will be set for a recurrence
+    /// whenever either NUW or NSW are set.
+    enum NoWrapFlags { FlagAnyWrap = 0,          // No guarantee.
+                       FlagNW      = (1 << 0),   // No self-wrap.
+                       FlagNUW     = (1 << 1),   // No unsigned wrap.
+                       FlagNSW     = (1 << 2),   // No signed wrap.
+                       NoWrapMask  = (1 << 3) -1 };
+
     explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy) :
       FastID(ID), SCEVType(SCEVTy), SubclassData(0) {}
 
@@ -159,6 +183,20 @@ namespace llvm {
       ProperlyDominatesBlock ///< The SCEV properly dominates the block.
     };
 
+    /// Convenient NoWrapFlags manipulation that hides enum casts and is
+    /// visible in the ScalarEvolution name space.
+    static SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags, int Mask) {
+      return (SCEV::NoWrapFlags)(Flags & Mask);
+    }
+    static SCEV::NoWrapFlags setFlags(SCEV::NoWrapFlags Flags,
+                                      SCEV::NoWrapFlags OnFlags) {
+      return (SCEV::NoWrapFlags)(Flags | OnFlags);
+    }
+    static SCEV::NoWrapFlags clearFlags(SCEV::NoWrapFlags Flags,
+                                        SCEV::NoWrapFlags OffFlags) {
+      return (SCEV::NoWrapFlags)(Flags & ~OffFlags);
+    }
+
   private:
     /// SCEVCallbackVH - A CallbackVH to arrange for ScalarEvolution to be
     /// notified whenever a Value is deleted.
@@ -465,44 +503,41 @@ namespace llvm {
     const SCEV *getSignExtendExpr(const SCEV *Op, const Type *Ty);
     const SCEV *getAnyExtendExpr(const SCEV *Op, const Type *Ty);
     const SCEV *getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
-                           bool HasNUW = false, bool HasNSW = false);
+                           SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
     const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS,
-                           bool HasNUW = false, bool HasNSW = false) {
+                           SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
       SmallVector<const SCEV *, 2> Ops;
       Ops.push_back(LHS);
       Ops.push_back(RHS);
-      return getAddExpr(Ops, HasNUW, HasNSW);
+      return getAddExpr(Ops, Flags);
     }
-    const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1,
-                           const SCEV *Op2,
-                           bool HasNUW = false, bool HasNSW = false) {
+    const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2,
+                           SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
       SmallVector<const SCEV *, 3> Ops;
       Ops.push_back(Op0);
       Ops.push_back(Op1);
       Ops.push_back(Op2);
-      return getAddExpr(Ops, HasNUW, HasNSW);
+      return getAddExpr(Ops, Flags);
     }
     const SCEV *getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
-                           bool HasNUW = false, bool HasNSW = false);
+                           SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
     const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS,
-                           bool HasNUW = false, bool HasNSW = false) {
+                           SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap)
+    {
       SmallVector<const SCEV *, 2> Ops;
       Ops.push_back(LHS);
       Ops.push_back(RHS);
-      return getMulExpr(Ops, HasNUW, HasNSW);
+      return getMulExpr(Ops, Flags);
     }
     const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS);
     const SCEV *getAddRecExpr(const SCEV *Start, const SCEV *Step,
-                              const Loop *L,
-                              bool HasNUW = false, bool HasNSW = false);
+                              const Loop *L, SCEV::NoWrapFlags Flags);
     const SCEV *getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
-                              const Loop *L,
-                              bool HasNUW = false, bool HasNSW = false);
+                              const Loop *L, SCEV::NoWrapFlags Flags);
     const SCEV *getAddRecExpr(const SmallVectorImpl<const SCEV *> &Operands,
-                              const Loop *L,
-                              bool HasNUW = false, bool HasNSW = false) {
+                              const Loop *L, SCEV::NoWrapFlags Flags) {
       SmallVector<const SCEV *, 4> NewOp(Operands.begin(), Operands.end());
-      return getAddRecExpr(NewOp, L, HasNUW, HasNSW);
+      return getAddRecExpr(NewOp, L, Flags);
     }
     const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
     const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
@@ -537,11 +572,9 @@ namespace llvm {
     ///
     const SCEV *getNotSCEV(const SCEV *V);
 
-    /// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1,
-    /// and thus the HasNUW and HasNSW bits apply to the resultant add, not
-    /// whether the sub would have overflowed.
+    /// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
     const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
-                             bool HasNUW = false, bool HasNSW = false);
+                             SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
 
     /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion
     /// of the input value to the specified type.  If the type must be
@@ -586,6 +619,12 @@ namespace llvm {
     const SCEV *getUMinFromMismatchedTypes(const SCEV *LHS,
                                            const SCEV *RHS);
 
+    /// getPointerBase - Transitively follow the chain of pointer-type operands
+    /// until reaching a SCEV that does not have a single pointer operand. This
+    /// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
+    /// but corner cases do exist.
+    const SCEV *getPointerBase(const SCEV *V);
+
     /// getSCEVAtScope - Return a SCEV expression for the specified value
     /// at the specified scope in the program.  The L value specifies a loop
     /// nest to evaluate the expression at, where null is the top-level or a
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index db432c8..856d92c 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -160,13 +160,8 @@ namespace llvm {
 
     const Type *getType() const { return getOperand(0)->getType(); }
 
-    bool hasNoUnsignedWrap() const { return SubclassData & (1 << 0); }
-    void setHasNoUnsignedWrap(bool B) {
-      SubclassData = (SubclassData & ~(1 << 0)) | (B << 0);
-    }
-    bool hasNoSignedWrap() const { return SubclassData & (1 << 1); }
-    void setHasNoSignedWrap(bool B) {
-      SubclassData = (SubclassData & ~(1 << 1)) | (B << 1);
+    NoWrapFlags getNoWrapFlags(NoWrapFlags Mask = NoWrapMask) const {
+      return (NoWrapFlags)(SubclassData & Mask);
     }
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -199,6 +194,11 @@ namespace llvm {
              S->getSCEVType() == scSMaxExpr ||
              S->getSCEVType() == scUMaxExpr;
     }
+
+    /// Set flags for a non-recurrence without clearing previously set flags.
+    void setNoWrapFlags(NoWrapFlags Flags) {
+      SubclassData |= Flags;
+    }
   };
 
 
@@ -305,11 +305,12 @@ namespace llvm {
     /// getStepRecurrence - This method constructs and returns the recurrence
     /// indicating how much this expression steps by.  If this is a polynomial
     /// of degree N, it returns a chrec of degree N-1.
+    /// We cannot determine whether the step recurrence has self-wraparound.
     const SCEV *getStepRecurrence(ScalarEvolution &SE) const {
       if (isAffine()) return getOperand(1);
       return SE.getAddRecExpr(SmallVector<const SCEV *, 3>(op_begin()+1,
                                                            op_end()),
-                              getLoop());
+                              getLoop(), FlagAnyWrap);
     }
 
     /// isAffine - Return true if this is an affine AddRec (i.e., it represents
@@ -327,6 +328,15 @@ namespace llvm {
       return getNumOperands() == 3;
     }
 
+    /// Set flags for a recurrence without clearing any previously set flags.
+    /// For AddRec, either NUW or NSW implies NW. Keep track of this fact here
+    /// to make it easier to propagate flags.
+    void setNoWrapFlags(NoWrapFlags Flags) {
+      if (Flags & (FlagNUW | FlagNSW))
+        Flags = ScalarEvolution::setFlags(Flags, FlagNW);
+      SubclassData |= Flags;
+    }
+
     /// evaluateAtIteration - Return the value of this chain of recurrences at
     /// the specified iteration number.
     const SCEV *evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const;
@@ -364,8 +374,7 @@ namespace llvm {
                  const SCEV *const *O, size_t N)
       : SCEVCommutativeExpr(ID, scSMaxExpr, O, N) {
       // Max never overflows.
-      setHasNoUnsignedWrap(true);
-      setHasNoSignedWrap(true);
+      setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW));
     }
 
   public:
@@ -387,8 +396,7 @@ namespace llvm {
                  const SCEV *const *O, size_t N)
       : SCEVCommutativeExpr(ID, scUMaxExpr, O, N) {
       // Max never overflows.
-      setHasNoUnsignedWrap(true);
-      setHasNoSignedWrap(true);
+      setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW));
     }
 
   public:
diff --git a/include/llvm/Bitcode/Archive.h b/include/llvm/Bitcode/Archive.h
index c3c07d8..f89a86c 100644
--- a/include/llvm/Bitcode/Archive.h
+++ b/include/llvm/Bitcode/Archive.h
@@ -25,7 +25,6 @@
 
 namespace llvm {
   class MemoryBuffer;
-  class raw_ostream;
 
 // Forward declare classes
 class Module;              // From VMCore
@@ -436,7 +435,7 @@ class Archive {
     /// to determine just enough information to create an ArchiveMember object
     /// which is then inserted into the Archive object's ilist at the location
     /// given by \p where.
-    /// @returns true if an error occured, false otherwise
+    /// @returns true if an error occurred, false otherwise
     /// @brief Add a file to the archive.
     bool addFileBefore(
       const sys::Path& filename, ///< The file to be added
@@ -483,7 +482,7 @@ class Archive {
     bool loadSymbolTable(std::string* ErrMessage);
 
     /// @brief Write the symbol table to an ofstream.
-    void writeSymbolTable(raw_ostream& ARFile);
+    void writeSymbolTable(std::ofstream& ARFile);
 
     /// Writes one ArchiveMember to an ofstream. If an error occurs, returns
     /// false, otherwise true. If an error occurs and error is non-null then
@@ -492,7 +491,7 @@ class Archive {
     /// @returns true Writing member failed, \p error set to error message
     bool writeMember(
       const ArchiveMember& member, ///< The member to be written
-      raw_ostream& ARFile,       ///< The file to write member onto
+      std::ofstream& ARFile,       ///< The file to write member onto
       bool CreateSymbolTable,      ///< Should symbol table be created?
       bool TruncateNames,          ///< Should names be truncated to 11 chars?
       bool ShouldCompress,         ///< Should the member be compressed?
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index a071feb..58395ba 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -183,6 +183,10 @@ namespace llvm {
     /// function.
     void EmitFunctionBody();
 
+    void emitPrologLabel(const MachineInstr &MI);
+
+    bool needsCFIMoves();
+
     /// EmitConstantPool - Print to the current output stream assembly
     /// representations of the constants in the constant pool MCP. This is
     /// used to print out constants which have been "spilled to memory" by
@@ -377,10 +381,17 @@ namespace llvm {
     /// operands.
     virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
 
+    /// getDwarfRegOpSize - get size required to emit given machine location
+    /// using dwarf encoding.
+    virtual unsigned getDwarfRegOpSize(const MachineLocation &MLoc) const;
+
     /// getISAEncoding - Get the value for DW_AT_APPLE_isa. Zero if no isa
     /// encoding specified.
     virtual unsigned getISAEncoding() { return 0; }
 
+    /// EmitDwarfRegOp - Emit dwarf register operation.
+    virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const;
+
     //===------------------------------------------------------------------===//
     // Dwarf Lowering Routines
     //===------------------------------------------------------------------===//
@@ -389,6 +400,7 @@ namespace llvm {
     /// frame.
     void EmitFrameMoves(const std::vector<MachineMove> &Moves,
                         MCSymbol *BaseLabel, bool isEH) const;
+    void EmitCFIFrameMove(const MachineMove &Move) const;
     void EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const;
 
     //===------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h
index 853ebf9..60edcc5 100644
--- a/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/include/llvm/CodeGen/CalcSpillWeights.h
@@ -11,7 +11,7 @@
 #ifndef LLVM_CODEGEN_CALCSPILLWEIGHTS_H
 #define LLVM_CODEGEN_CALCSPILLWEIGHTS_H
 
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/ADT/DenseMap.h"
 
 namespace llvm {
@@ -29,28 +29,25 @@ namespace llvm {
   /// @param Size       Size of live interval as returnexd by getSize()
   ///
   static inline float normalizeSpillWeight(float UseDefFreq, unsigned Size) {
-    // The magic constant 200 corresponds to approx. 25 instructions since
-    // SlotIndexes allocate 8 slots per instruction.
-    //
-    // The constant is added to avoid depending too much on accidental SlotIndex
-    // gaps for small intervals. The effect is that small intervals have a spill
-    // weight that is mostly proportional to the number of uses, while large
-    // intervals get a spill weight that is closer to a use density.
-    //
-    return UseDefFreq / (Size + 200);
+    // The constant 25 instructions is added to avoid depending too much on
+    // accidental SlotIndex gaps for small intervals. The effect is that small
+    // intervals have a spill weight that is mostly proportional to the number
+    // of uses, while large intervals get a spill weight that is closer to a use
+    // density.
+    return UseDefFreq / (Size + 25*SlotIndex::InstrDist);
   }
 
   /// VirtRegAuxInfo - Calculate auxiliary information for a virtual
   /// register such as its spill weight and allocation hint.
   class VirtRegAuxInfo {
-    MachineFunction &mf_;
-    LiveIntervals &lis_;
-    const MachineLoopInfo &loops_;
-    DenseMap<unsigned, float> hint_;
+    MachineFunction &MF;
+    LiveIntervals &LIS;
+    const MachineLoopInfo &Loops;
+    DenseMap<unsigned, float> Hint;
   public:
     VirtRegAuxInfo(MachineFunction &mf, LiveIntervals &lis,
                    const MachineLoopInfo &loops) :
-      mf_(mf), lis_(lis), loops_(loops) {}
+      MF(mf), LIS(lis), Loops(loops) {}
 
     /// CalculateRegClass - recompute the register class for reg from its uses.
     /// Since the register class can affect the allocation hint, this function
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index 2a9bbdf..9018ea3 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -141,6 +141,8 @@ typedef bool CCCustomFn(unsigned &ValNo, MVT &ValVT,
                         MVT &LocVT, CCValAssign::LocInfo &LocInfo,
                         ISD::ArgFlagsTy &ArgFlags, CCState &State);
 
+typedef enum { Invalid, Prologue, Call } ParmContext;
+
 /// CCState - This class holds information needed while lowering arguments and
 /// return values.  It captures which registers are already assigned and which
 /// stack slots are used.  It provides accessors to allocate these values.
@@ -154,6 +156,9 @@ class CCState {
 
   unsigned StackOffset;
   SmallVector<uint32_t, 16> UsedRegs;
+  unsigned FirstByValReg;
+  bool FirstByValRegValid;
+  ParmContext CallOrPrologue;
 public:
   CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM,
           SmallVector<CCValAssign, 16> &locs, LLVMContext &C);
@@ -288,6 +293,16 @@ public:
                    MVT LocVT, CCValAssign::LocInfo LocInfo,
                    int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
 
+  // First GPR that carries part of a byval aggregate that's split
+  // between registers and memory.
+  unsigned getFirstByValReg() { return FirstByValRegValid ? FirstByValReg : 0; }
+  void setFirstByValReg(unsigned r) { FirstByValReg = r; FirstByValRegValid = true; }
+  void clearFirstByValReg() { FirstByValReg = 0; FirstByValRegValid = false; }
+  bool isFirstByValRegValid() { return FirstByValRegValid; }
+
+  ParmContext getCallOrPrologue() { return CallOrPrologue; }
+  void setCallOrPrologue(ParmContext pc) { CallOrPrologue = pc; }
+
 private:
   /// MarkAllocated - Mark a register and all of its aliases as allocated.
   void MarkAllocated(unsigned Reg);
diff --git a/include/llvm/CodeGen/EdgeBundles.h b/include/llvm/CodeGen/EdgeBundles.h
index 2c5215a..8aab3c6 100644
--- a/include/llvm/CodeGen/EdgeBundles.h
+++ b/include/llvm/CodeGen/EdgeBundles.h
@@ -16,6 +16,7 @@
 #ifndef LLVM_CODEGEN_EDGEBUNDLES_H
 #define LLVM_CODEGEN_EDGEBUNDLES_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/IntEqClasses.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 
@@ -29,6 +30,9 @@ class EdgeBundles : public MachineFunctionPass {
   ///   2*BB->getNumber()+1 -> Outgoing bundle.
   IntEqClasses EC;
 
+  /// Blocks - Map each bundle to a list of basic block numbers.
+  SmallVector<SmallVector<unsigned, 8>, 4> Blocks;
+
 public:
   static char ID;
   EdgeBundles() : MachineFunctionPass(ID) {}
@@ -40,6 +44,9 @@ public:
   /// getNumBundles - Return the total number of bundles in the CFG.
   unsigned getNumBundles() const { return EC.getNumClasses(); }
 
+  /// getBlocks - Return an array of blocks that are connected to Bundle.
+  ArrayRef<unsigned> getBlocks(unsigned Bundle) { return Blocks[Bundle]; }
+
   /// getMachineFunction - Return the last machine function computed.
   const MachineFunction *getMachineFunction() const { return MF; }
 
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index fbb1200..10c4c33d 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 //
 // This file defines the FastISel class.
-//  
+//
 //===----------------------------------------------------------------------===//
-  
+
 #ifndef LLVM_CODEGEN_FASTISEL_H
 #define LLVM_CODEGEN_FASTISEL_H
 
@@ -108,7 +108,7 @@ public:
                              const LoadInst * /*LI*/) {
     return false;
   }
-  
+
   /// recomputeInsertPt - Reset InsertPt to prepare for inserting instructions
   /// into the current block.
   void recomputeInsertPt();
@@ -203,16 +203,7 @@ protected:
                         unsigned Opcode,
                         unsigned Op0, bool Op0IsKill,
                         uint64_t Imm, MVT ImmType);
-  
-  /// FastEmit_rf_ - This method is a wrapper of FastEmit_rf. It first tries
-  /// to emit an instruction with an immediate operand using FastEmit_rf.
-  /// If that fails, it materializes the immediate into a register and try
-  /// FastEmit_rr instead.
-  unsigned FastEmit_rf_(MVT VT,
-                        unsigned Opcode,
-                        unsigned Op0, bool Op0IsKill,
-                        const ConstantFP *FPImm, MVT ImmType);
-  
+
   /// FastEmit_i - This method is called by target-independent code
   /// to request that an instruction with the given type, opcode, and
   /// immediate operand be emitted.
@@ -250,14 +241,22 @@ protected:
                            unsigned Op0, bool Op0IsKill,
                            unsigned Op1, bool Op1IsKill);
 
-  /// FastEmitInst_ri - Emit a MachineInstr with two register operands
-  /// and a result register in the given register class.
+  /// FastEmitInst_ri - Emit a MachineInstr with a register operand,
+  /// an immediate, and a result register in the given register class.
   ///
   unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
                            const TargetRegisterClass *RC,
                            unsigned Op0, bool Op0IsKill,
                            uint64_t Imm);
 
+  /// FastEmitInst_rii - Emit a MachineInstr with one register operand
+  /// and two immediate operands.
+  ///
+  unsigned FastEmitInst_rii(unsigned MachineInstOpcode,
+                           const TargetRegisterClass *RC,
+                           unsigned Op0, bool Op0IsKill,
+                           uint64_t Imm1, uint64_t Imm2);
+
   /// FastEmitInst_rf - Emit a MachineInstr with two register operands
   /// and a result register in the given register class.
   ///
@@ -274,13 +273,18 @@ protected:
                             unsigned Op0, bool Op0IsKill,
                             unsigned Op1, bool Op1IsKill,
                             uint64_t Imm);
-  
+
   /// FastEmitInst_i - Emit a MachineInstr with a single immediate
   /// operand, and a result register in the given register class.
   unsigned FastEmitInst_i(unsigned MachineInstrOpcode,
                           const TargetRegisterClass *RC,
                           uint64_t Imm);
 
+  /// FastEmitInst_ii - Emit a MachineInstr with a two immediate operands.
+  unsigned FastEmitInst_ii(unsigned MachineInstrOpcode,
+                          const TargetRegisterClass *RC,
+                          uint64_t Imm1, uint64_t Imm2);
+
   /// FastEmitInst_extractsubreg - Emit a MachineInstr for an extract_subreg
   /// from a specified index of a superregister to a specified type.
   unsigned FastEmitInst_extractsubreg(MVT RetVT,
@@ -300,8 +304,8 @@ protected:
   unsigned UpdateValueMap(const Value* I, unsigned Reg);
 
   unsigned createResultReg(const TargetRegisterClass *RC);
-  
-  /// TargetMaterializeConstant - Emit a constant in a register using 
+
+  /// TargetMaterializeConstant - Emit a constant in a register using
   /// target-specific logic, such as constant pool loads.
   virtual unsigned TargetMaterializeConstant(const Constant* C) {
     return 0;
@@ -313,6 +317,10 @@ protected:
     return 0;
   }
 
+  virtual unsigned TargetMaterializeFloatZero(const ConstantFP* CF) {
+    return 0;
+  }
+
 private:
   bool SelectBinaryOp(const User *I, unsigned ISDOpcode);
 
@@ -323,7 +331,7 @@ private:
   bool SelectCall(const User *I);
 
   bool SelectBitCast(const User *I);
-  
+
   bool SelectCast(const User *I, unsigned Opcode);
 
   /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index b41f30d..4421cc0 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -187,7 +187,12 @@ public:
   /// InvalidatePHILiveOutRegInfo - Invalidates a PHI's LiveOutInfo, to be
   /// called when a block is visited before all of its predecessors.
   void InvalidatePHILiveOutRegInfo(const PHINode *PN) {
-    unsigned Reg = ValueMap[PN];
+    // PHIs with no uses have no ValueMap entry.
+    DenseMap<const Value*, unsigned>::const_iterator It = ValueMap.find(PN);
+    if (It == ValueMap.end())
+      return;
+
+    unsigned Reg = It->second;
     LiveOutRegInfo.grow(Reg);
     LiveOutRegInfo[Reg].IsValid = false;
   }
@@ -209,8 +214,9 @@ private:
 void AddCatchInfo(const CallInst &I,
                   MachineModuleInfo *MMI, MachineBasicBlock *MBB);
 
-/// CopyCatchInfo - Copy catch information from DestBB to SrcBB.
-void CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB,
+/// CopyCatchInfo - Copy catch information from SuccBB (or one of its
+/// successors) to LPad.
+void CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
                    MachineModuleInfo *MMI, FunctionLoweringInfo &FLI);
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 3da11c4..f0de936 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -219,7 +219,7 @@ namespace ISD {
     // RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
     // These nodes take two operands: the normal LHS and RHS to the add. They
     // produce two results: the normal result of the add, and a boolean that
-    // indicates if an overflow occured (*not* a flag, because it may be stored
+    // indicates if an overflow occurred (*not* a flag, because it may be stored
     // to memory, etc.).  If the type of the boolean is not i1 then the high
     // bits conform to getBooleanContents.
     // These nodes are generated from the llvm.[su]add.with.overflow intrinsics.
diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h
index fea8523..88e22d6 100644
--- a/include/llvm/CodeGen/JITCodeEmitter.h
+++ b/include/llvm/CodeGen/JITCodeEmitter.h
@@ -23,8 +23,6 @@
 #include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/ADT/DenseMap.h"
 
-using namespace std;
-
 namespace llvm {
 
 class MachineBasicBlock;
@@ -38,7 +36,7 @@ class GlobalValue;
 class Function;
   
 /// JITCodeEmitter - This class defines two sorts of methods: those for
-/// emitting the actual bytes of machine code, and those for emitting auxillary
+/// emitting the actual bytes of machine code, and those for emitting auxiliary
 /// structures, such as jump tables, relocations, etc.
 ///
 /// Emission of machine code is complicated by the fact that we don't (in
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 88131fb..c5285ce 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -286,6 +286,11 @@ namespace llvm {
       return valnos[ValNo];
     }
 
+    /// containsValue - Returns true if VNI belongs to this interval.
+    bool containsValue(const VNInfo *VNI) const {
+      return VNI && VNI->id < getNumValNums() && VNI == getValNumInfo(VNI->id);
+    }
+
     /// getNextValue - Create a new value number and return it.  MIIdx specifies
     /// the instruction that defines the value number.
     VNInfo *getNextValue(SlotIndex def, MachineInstr *CopyMI,
@@ -447,6 +452,11 @@ namespace llvm {
       addRangeFrom(LR, ranges.begin());
     }
 
+    /// extendInBlock - If this interval is live before UseIdx in the basic
+    /// block that starts at StartIdx, extend it to be live at UseIdx and return
+    /// the value. If there is no live range before UseIdx, return NULL.
+    VNInfo *extendInBlock(SlotIndex StartIdx, SlotIndex UseIdx);
+
     /// join - Join two live intervals (this, and other) together.  This applies
     /// mappings to the value numbers in the LHS/RHS intervals as specified.  If
     /// the intervals are not joinable, this aborts.
@@ -543,8 +553,8 @@ namespace llvm {
   /// }
 
   class ConnectedVNInfoEqClasses {
-    LiveIntervals &lis_;
-    IntEqClasses eqClass_;
+    LiveIntervals &LIS;
+    IntEqClasses EqClass;
 
     // Note that values a and b are connected.
     void Connect(unsigned a, unsigned b);
@@ -552,7 +562,7 @@ namespace llvm {
     unsigned Renumber();
 
   public:
-    explicit ConnectedVNInfoEqClasses(LiveIntervals &lis) : lis_(lis) {}
+    explicit ConnectedVNInfoEqClasses(LiveIntervals &lis) : LIS(lis) {}
 
     /// Classify - Classify the values in LI into connected components.
     /// Return the number of connected components.
@@ -560,12 +570,13 @@ namespace llvm {
 
     /// getEqClass - Classify creates equivalence classes numbered 0..N. Return
     /// the equivalence class assigned the VNI.
-    unsigned getEqClass(const VNInfo *VNI) const { return eqClass_[VNI->id]; }
+    unsigned getEqClass(const VNInfo *VNI) const { return EqClass[VNI->id]; }
 
     /// Distribute - Distribute values in LIV[0] into a separate LiveInterval
     /// for each connected component. LIV must have a LiveInterval for each
     /// connected component. The LiveIntervals in Liv[1..] must be empty.
-    void Distribute(LiveInterval *LIV[]);
+    /// Instructions using LIV[0] are rewritten.
+    void Distribute(LiveInterval *LIV[], MachineRegisterInfo &MRI);
 
   };
 
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index b09f8d1..8ca58b8 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -159,7 +159,11 @@ namespace llvm {
     /// range to just the remaining uses. This method does not compute reaching
     /// defs for new uses, and it doesn't remove dead defs.
     /// Dead PHIDef values are marked as unused.
-    void shrinkToUses(LiveInterval *li);
+    /// New dead machine instructions are added to the dead vector.
+    /// Return true if the interval may have been separated into multiple
+    /// connected components.
+    bool shrinkToUses(LiveInterval *li,
+                      SmallVectorImpl<MachineInstr*> *dead = 0);
 
     // Interval removal
 
@@ -272,7 +276,7 @@ namespace llvm {
     /// (if any is created) by reference. This is temporary.
     std::vector<LiveInterval*>
     addIntervalsForSpills(const LiveInterval& i,
-                          const SmallVectorImpl<LiveInterval*> &SpillIs,
+                          const SmallVectorImpl<LiveInterval*> *SpillIs,
                           const MachineLoopInfo *loopInfo, VirtRegMap& vrm);
 
     /// spillPhysRegAroundRegDefsUses - Spill the specified physical register
@@ -285,7 +289,7 @@ namespace llvm {
     /// val# of the specified interval is re-materializable. Also returns true
     /// by reference if all of the defs are load instructions.
     bool isReMaterializable(const LiveInterval &li,
-                            const SmallVectorImpl<LiveInterval*> &SpillIs,
+                            const SmallVectorImpl<LiveInterval*> *SpillIs,
                             bool &isLoad);
 
     /// isReMaterializable - Returns true if the definition MI of the specified
@@ -372,7 +376,7 @@ namespace llvm {
     /// by reference if the def is a load.
     bool isReMaterializable(const LiveInterval &li, const VNInfo *ValNo,
                             MachineInstr *MI,
-                            const SmallVectorImpl<LiveInterval*> &SpillIs,
+                            const SmallVectorImpl<LiveInterval*> *SpillIs,
                             bool &isLoad);
 
     /// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 1785451..ad12157 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -16,6 +16,7 @@
 
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/ADT/GraphTraits.h"
+#include <functional>
 
 namespace llvm {
 
@@ -304,10 +305,18 @@ public:
   /// it returns end()
   iterator getFirstTerminator();
 
+  const_iterator getFirstTerminator() const {
+    return const_cast<MachineBasicBlock*>(this)->getFirstTerminator();
+  }
+
   /// getLastNonDebugInstr - returns an iterator to the last non-debug
   /// instruction in the basic block, or end()
   iterator getLastNonDebugInstr();
 
+  const_iterator getLastNonDebugInstr() const {
+    return const_cast<MachineBasicBlock*>(this)->getLastNonDebugInstr();
+  }
+
   /// SplitCriticalEdge - Split the critical edge from this block to the
   /// given successor block, and return the newly created block, or null
   /// if splitting is not possible.
@@ -411,6 +420,14 @@ raw_ostream& operator<<(raw_ostream &OS, const MachineBasicBlock &MBB);
 
 void WriteAsOperand(raw_ostream &, const MachineBasicBlock*, bool t);
 
+// This is useful when building IndexedMaps keyed on basic block pointers.
+struct MBB2NumberFunctor :
+  public std::unary_function<const MachineBasicBlock*, unsigned> {
+  unsigned operator()(const MachineBasicBlock *MBB) const {
+    return MBB->getNumber();
+  }
+};
+
 //===--------------------------------------------------------------------===//
 // GraphTraits specializations for machine basic block graphs (machine-CFGs)
 //===--------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/MachineCodeEmitter.h b/include/llvm/CodeGen/MachineCodeEmitter.h
index 8fc80ad..428aada 100644
--- a/include/llvm/CodeGen/MachineCodeEmitter.h
+++ b/include/llvm/CodeGen/MachineCodeEmitter.h
@@ -34,7 +34,7 @@ class Function;
 class MCSymbol;
 
 /// MachineCodeEmitter - This class defines two sorts of methods: those for
-/// emitting the actual bytes of machine code, and those for emitting auxillary
+/// emitting the actual bytes of machine code, and those for emitting auxiliary
 /// structures, such as jump tables, relocations, etc.
 ///
 /// Emission of machine code is complicated by the fact that we don't (in
@@ -54,7 +54,7 @@ protected:
   /// allocated for this code buffer.
   uint8_t *BufferBegin, *BufferEnd;
   /// CurBufferPtr - Pointer to the next byte of memory to fill when emitting
-  /// code.  This is guranteed to be in the range [BufferBegin,BufferEnd].  If
+  /// code.  This is guaranteed to be in the range [BufferBegin,BufferEnd].  If
   /// this pointer is at BufferEnd, it will never move due to code emission, and
   /// all code emission requests will be ignored (this is the buffer overflow
   /// condition).
diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h
index 5727321..beb16a2 100644
--- a/include/llvm/CodeGen/MachineConstantPool.h
+++ b/include/llvm/CodeGen/MachineConstantPool.h
@@ -80,7 +80,7 @@ public:
   } Val;
 
   /// The required alignment for this entry. The top bit is set when Val is
-  /// a MachineConstantPoolValue.
+  /// a target specific MachineConstantPoolValue.
   unsigned Alignment;
 
   MachineConstantPoolEntry(const Constant *V, unsigned A)
@@ -93,6 +93,9 @@ public:
     Alignment |= 1U << (sizeof(unsigned)*CHAR_BIT-1);
   }
 
+  /// isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry
+  /// is indeed a target specific constantpool entry, not a wrapper over a
+  /// Constant.
   bool isMachineConstantPoolEntry() const {
     return (int)Alignment < 0;
   }
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index 22a82a9..4ea6aa3 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -15,7 +15,6 @@
 #define LLVM_CODEGEN_MACHINEFRAMEINFO_H
 
 #include "llvm/ADT/SmallVector.h"
-//#include "llvm/ADT/IndexedMap.h"
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
 #include <vector>
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index 82c5332..2724689 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -50,13 +50,22 @@ public:
   enum CommentFlag {
     ReloadReuse = 0x1
   };
-  
+
+  enum MIFlag {
+    NoFlags    = 0,
+    FrameSetup = 1 << 0                 // Instruction is used as a part of
+                                        // function frame setup code.
+  };
 private:
   const TargetInstrDesc *TID;           // Instruction descriptor.
-  unsigned short NumImplicitOps;        // Number of implicit operands (which
+  uint16_t NumImplicitOps;              // Number of implicit operands (which
                                         // are determined at construction time).
 
-  unsigned short AsmPrinterFlags;       // Various bits of information used by
+  uint8_t Flags;                        // Various bits of additional
+                                        // information about machine
+                                        // instruction.
+
+  uint8_t AsmPrinterFlags;              // Various bits of information used by
                                         // the AsmPrinter to emit helpful
                                         // comments.  This is *not* semantic
                                         // information.  Do not use this for
@@ -105,13 +114,13 @@ private:
   /// MachineInstr ctor - This constructor create a MachineInstr and add the
   /// implicit operands.  It reserves space for number of operands specified by
   /// TargetInstrDesc.  An explicit DebugLoc is supplied.
-  explicit MachineInstr(const TargetInstrDesc &TID, const DebugLoc dl, 
+  explicit MachineInstr(const TargetInstrDesc &TID, const DebugLoc dl,
                         bool NoImp = false);
 
   /// MachineInstr ctor - Work exactly the same as the ctor above, except that
   /// the MachineInstr is created and added to the end of the specified basic
   /// block.
-  MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, 
+  MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
                const TargetInstrDesc &TID);
 
   ~MachineInstr();
@@ -125,12 +134,12 @@ public:
 
   /// getAsmPrinterFlags - Return the asm printer flags bitvector.
   ///
-  unsigned short getAsmPrinterFlags() const { return AsmPrinterFlags; }
+  uint8_t getAsmPrinterFlags() const { return AsmPrinterFlags; }
 
   /// clearAsmPrinterFlags - clear the AsmPrinter bitvector
   ///
   void clearAsmPrinterFlags() { AsmPrinterFlags = 0; }
-  
+
   /// getAsmPrinterFlag - Return whether an AsmPrinter flag is set.
   ///
   bool getAsmPrinterFlag(CommentFlag Flag) const {
@@ -140,9 +149,28 @@ public:
   /// setAsmPrinterFlag - Set a flag for the AsmPrinter.
   ///
   void setAsmPrinterFlag(CommentFlag Flag) {
-    AsmPrinterFlags |= (unsigned short)Flag;
+    AsmPrinterFlags |= (uint8_t)Flag;
+  }
+
+  /// getFlags - Return the MI flags bitvector.
+  uint8_t getFlags() const {
+    return Flags;
+  }
+
+  /// getFlag - Return whether an MI flag is set.
+  bool getFlag(MIFlag Flag) const {
+    return Flags & Flag;
+  }
+
+  /// setFlag - Set a MI flag.
+  void setFlag(MIFlag Flag) {
+    Flags |= (uint8_t)Flag;
+  }
+
+  void setFlags(unsigned flags) {
+    Flags = flags;
   }
-  
+
   /// clearAsmPrinterFlag - clear specific AsmPrinter flags
   ///
   void clearAsmPrinterFlag(CommentFlag Flag) {
@@ -152,7 +180,7 @@ public:
   /// getDebugLoc - Returns the debug location id of this MachineInstr.
   ///
   DebugLoc getDebugLoc() const { return debugLoc; }
-  
+
   /// getDesc - Returns the target instruction descriptor of this
   /// MachineInstr.
   const TargetInstrDesc &getDesc() const { return *TID; }
@@ -213,7 +241,7 @@ public:
   /// removeFromParent - This method unlinks 'this' from the containing basic
   /// block, and returns it, but does not delete it.
   MachineInstr *removeFromParent();
-  
+
   /// eraseFromParent - This method unlinks 'this' from the containing basic
   /// block and deletes it.
   void eraseFromParent();
@@ -225,14 +253,14 @@ public:
            getOpcode() == TargetOpcode::EH_LABEL ||
            getOpcode() == TargetOpcode::GC_LABEL;
   }
-  
+
   bool isPrologLabel() const {
     return getOpcode() == TargetOpcode::PROLOG_LABEL;
   }
   bool isEHLabel() const { return getOpcode() == TargetOpcode::EH_LABEL; }
   bool isGCLabel() const { return getOpcode() == TargetOpcode::GC_LABEL; }
   bool isDebugValue() const { return getOpcode() == TargetOpcode::DBG_VALUE; }
-  
+
   bool isPHI() const { return getOpcode() == TargetOpcode::PHI; }
   bool isKill() const { return getOpcode() == TargetOpcode::KILL; }
   bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; }
@@ -329,7 +357,7 @@ public:
     int Idx = findRegisterUseOperandIdx(Reg, isKill, TRI);
     return (Idx == -1) ? NULL : &getOperand(Idx);
   }
-  
+
   /// findRegisterDefOperandIdx() - Returns the operand index that is a def of
   /// the specified register or -1 if it is not found. If isDead is true, defs
   /// that are not dead are skipped. If Overlap is true, then it also looks for
@@ -351,7 +379,7 @@ public:
   /// operand list that is used to represent the predicate. It returns -1 if
   /// none is found.
   int findFirstPredOperandIdx() const;
-  
+
   /// isRegTiedToUseOperand - Given the index of a register def operand,
   /// check if the register def is tied to a source operand, due to either
   /// two-address elimination or inline assembly constraints. Returns the
@@ -399,8 +427,8 @@ public:
   void addRegisterDefined(unsigned IncomingReg,
                           const TargetRegisterInfo *RegInfo = 0);
 
-  /// setPhysRegsDeadExcept - Mark every physreg used by this instruction as dead
-  /// except those in the UsedRegs list.
+  /// setPhysRegsDeadExcept - Mark every physreg used by this instruction as
+  /// dead except those in the UsedRegs list.
   void setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRegs,
                              const TargetRegisterInfo &TRI);
 
@@ -462,9 +490,9 @@ public:
   /// addOperand - Add the specified operand to the instruction.  If it is an
   /// implicit operand, it is added to the end of the operand list.  If it is
   /// an explicit operand it is added at the end of the explicit operand list
-  /// (before the first implicit operand). 
+  /// (before the first implicit operand).
   void addOperand(const MachineOperand &Op);
-  
+
   /// setDesc - Replace the instruction descriptor (thus opcode) of
   /// the current instruction with a new one.
   ///
@@ -501,12 +529,12 @@ private:
   /// addImplicitDefUseOperands - Add all implicit def and use operands to
   /// this instruction.
   void addImplicitDefUseOperands();
-  
+
   /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
   /// this instruction from their respective use lists.  This requires that the
   /// operands already be on their use lists.
   void RemoveRegOperandsFromUseLists();
-  
+
   /// AddRegOperandsToUseLists - Add all of the register operands in
   /// this instruction from their respective use lists.  This requires that the
   /// operands not be on their use lists yet.
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 1eb9735..967e019 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -48,6 +48,7 @@ public:
   /// Allow automatic conversion to the machine instruction we are working on.
   ///
   operator MachineInstr*() const { return MI; }
+  MachineInstr *operator->() const { return MI; }
   operator MachineBasicBlock::iterator() const { return MI; }
 
   /// addReg - Add a new virtual register operand...
@@ -145,6 +146,16 @@ public:
     return *this;
   }
 
+  const MachineInstrBuilder &setMIFlags(unsigned Flags) const {
+    MI->setFlags(Flags);
+    return *this;
+  }
+
+  const MachineInstrBuilder &setMIFlag(MachineInstr::MIFlag Flag) const {
+    MI->setFlag(Flag);
+    return *this;
+  }
+
   // Add a displacement from an existing MachineOperand with an added offset.
   const MachineInstrBuilder &addDisp(const MachineOperand &Disp,
                                      int64_t off) const {
diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h
index b2224cb..5240729 100644
--- a/include/llvm/CodeGen/PBQP/Graph.h
+++ b/include/llvm/CodeGen/PBQP/Graph.h
@@ -18,7 +18,6 @@
 #include "Math.h"
 
 #include <list>
-#include <vector>
 #include <map>
 
 namespace PBQP {
diff --git a/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h b/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
index 47a287c..e96c4cb 100644
--- a/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
+++ b/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
@@ -21,7 +21,6 @@
 #include "../HeuristicSolver.h"
 #include "../HeuristicBase.h"
 
-#include <set>
 #include <limits>
 
 namespace PBQP {
diff --git a/include/llvm/CodeGen/ProcessImplicitDefs.h b/include/llvm/CodeGen/ProcessImplicitDefs.h
index e2ab899..6ab57f0 100644
--- a/include/llvm/CodeGen/ProcessImplicitDefs.h
+++ b/include/llvm/CodeGen/ProcessImplicitDefs.h
@@ -18,14 +18,20 @@ namespace llvm {
 
   class MachineInstr;
   class TargetInstrInfo;
+  class TargetRegisterInfo;
+  class MachineRegisterInfo;
+  class LiveVariables;
 
   /// Process IMPLICIT_DEF instructions and make sure there is one implicit_def
   /// for each use. Add isUndef marker to implicit_def defs and their uses.
   class ProcessImplicitDefs : public MachineFunctionPass {
-  private:
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo *MRI;
+    LiveVariables *LV;
 
     bool CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg,
-                                unsigned OpIdx, const TargetInstrInfo *tii_,
+                                unsigned OpIdx,
                                 SmallSet<unsigned, 8> &ImpDefRegs);
 
   public:
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index 246831c..26b6773 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -100,7 +100,7 @@ public:
 
   /// getRegsAvailable - Return all available registers in the register class
   /// in Mask.
-  void getRegsAvailable(const TargetRegisterClass *RC, BitVector &Mask);
+  BitVector getRegsAvailable(const TargetRegisterClass *RC);
 
   /// FindUnusedReg - Find a unused register of the specified register class.
   /// Return 0 if none is found.
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index a51e82a..576be82 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -66,6 +66,16 @@ namespace RTLIB {
     UREM_I32,
     UREM_I64,
     UREM_I128,
+    SDIVREM_I8,
+    SDIVREM_I16,
+    SDIVREM_I32,
+    SDIVREM_I64,
+    SDIVREM_I128,
+    UDIVREM_I8,
+    UDIVREM_I16,
+    UDIVREM_I32,
+    UDIVREM_I64,
+    UDIVREM_I128,
     NEG_I32,
     NEG_I64,
 
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 3864ffd..2eb3db3 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -250,7 +250,9 @@ namespace llvm {
     unsigned NumSuccsLeft;              // # of succs not scheduled.
     unsigned short NumRegDefsLeft;      // # of reg defs with no scheduled use.
     unsigned short Latency;             // Node latency.
+    bool isVRegCycle      : 1;          // May use and def the same vreg.
     bool isCall           : 1;          // Is a function call.
+    bool isCallOp         : 1;          // Is a function call operand.
     bool isTwoAddress     : 1;          // Is a two-address instruction.
     bool isCommutable     : 1;          // Is a commutable instruction.
     bool hasPhysRegDefs   : 1;          // Has physreg defs that are being used.
@@ -259,6 +261,7 @@ namespace llvm {
     bool isAvailable      : 1;          // True once available.
     bool isScheduled      : 1;          // True once scheduled.
     bool isScheduleHigh   : 1;          // True if preferable to schedule high.
+    bool isScheduleLow    : 1;          // True if preferable to schedule low.
     bool isCloned         : 1;          // True if this node has been cloned.
     Sched::Preference SchedulingPref;   // Scheduling preference.
 
@@ -278,10 +281,10 @@ namespace llvm {
       : Node(node), Instr(0), OrigNode(0), NodeNum(nodenum),
         NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
         NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
-        isCall(false), isTwoAddress(false), isCommutable(false),
-        hasPhysRegDefs(false), hasPhysRegClobbers(false),
+        isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
+        isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
         isPending(false), isAvailable(false), isScheduled(false),
-        isScheduleHigh(false), isCloned(false),
+        isScheduleHigh(false), isScheduleLow(false), isCloned(false),
         SchedulingPref(Sched::None),
         isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
         CopyDstRC(NULL), CopySrcRC(NULL) {}
@@ -292,10 +295,10 @@ namespace llvm {
       : Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum),
         NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
         NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
-        isCall(false), isTwoAddress(false), isCommutable(false),
-        hasPhysRegDefs(false), hasPhysRegClobbers(false),
+        isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
+        isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
         isPending(false), isAvailable(false), isScheduled(false),
-        isScheduleHigh(false), isCloned(false),
+        isScheduleHigh(false), isScheduleLow(false), isCloned(false),
         SchedulingPref(Sched::None),
         isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
         CopyDstRC(NULL), CopySrcRC(NULL) {}
@@ -305,10 +308,10 @@ namespace llvm {
       : Node(0), Instr(0), OrigNode(0), NodeNum(~0u),
         NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
         NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
-        isCall(false), isTwoAddress(false), isCommutable(false),
-        hasPhysRegDefs(false), hasPhysRegClobbers(false),
+        isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
+        isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
         isPending(false), isAvailable(false), isScheduled(false),
-        isScheduleHigh(false), isCloned(false),
+        isScheduleHigh(false), isScheduleLow(false), isCloned(false),
         SchedulingPref(Sched::None),
         isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
         CopyDstRC(NULL), CopySrcRC(NULL) {}
@@ -356,7 +359,7 @@ namespace llvm {
     void removePred(const SDep &D);
 
     /// getDepth - Return the depth of this node, which is the length of the
-    /// maximum path up to any node with has no predecessors.
+    /// maximum path up to any node which has no predecessors.
     unsigned getDepth() const {
       if (!isDepthCurrent)
         const_cast<SUnit *>(this)->ComputeDepth();
@@ -364,7 +367,7 @@ namespace llvm {
     }
 
     /// getHeight - Return the height of this node, which is the length of the
-    /// maximum path down to any node with has no successors.
+    /// maximum path down to any node which has no successors.
     unsigned getHeight() const {
       if (!isHeightCurrent)
         const_cast<SUnit *>(this)->ComputeHeight();
@@ -690,11 +693,11 @@ namespace llvm {
     /// will create a cycle.
     bool WillCreateCycle(SUnit *SU, SUnit *TargetSU);
 
-    /// AddPred - Updates the topological ordering to accomodate an edge
+    /// AddPred - Updates the topological ordering to accommodate an edge
     /// to be added from SUnit X to SUnit Y.
     void AddPred(SUnit *Y, SUnit *X);
 
-    /// RemovePred - Updates the topological ordering to accomodate an
+    /// RemovePred - Updates the topological ordering to accommodate an
     /// an edge to be removed from the specified node N from the predecessors
     /// of the current node M.
     void RemovePred(SUnit *M, SUnit *N);
diff --git a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
index 8850006..118df28 100644
--- a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
+++ b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
@@ -21,7 +21,6 @@
 
 #include <cassert>
 #include <cstring>
-#include <string>
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index c9de95b..92fd0c9 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -438,12 +438,12 @@ public:
   SDValue getConvertRndSat(EVT VT, DebugLoc dl, SDValue Val, SDValue DTy,
                            SDValue STy,
                            SDValue Rnd, SDValue Sat, ISD::CvtCode Code);
-  
+
   /// getVectorShuffle - Return an ISD::VECTOR_SHUFFLE node.  The number of
   /// elements in VT, which must be a vector type, must match the number of
   /// mask elements NumElts.  A integer mask element equal to -1 is treated as
   /// undefined.
-  SDValue getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, SDValue N2, 
+  SDValue getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, SDValue N2,
                            const int *MaskElts);
 
   /// getSExtOrTrunc - Convert Op, which must be of integer type, to the
@@ -671,10 +671,10 @@ public:
 
   /// getMDNode - Return an MDNodeSDNode which holds an MDNode.
   SDValue getMDNode(const MDNode *MD);
-  
+
   /// getShiftAmountOperand - Return the specified value casted to
   /// the target's desired shift amount type.
-  SDValue getShiftAmountOperand(SDValue Op);
+  SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op);
 
   /// UpdateNodeOperands - *Mutate* the specified node in-place to have the
   /// specified operands.  If the resultant node already exists in the DAG,
@@ -829,7 +829,7 @@ public:
   /// These functions only replace all existing uses. It's possible that as
   /// these replacements are being performed, CSE may cause the From node
   /// to be given new uses. These new uses of From are left in place, and
-  /// not automatically transfered to To.
+  /// not automatically transferred to To.
   ///
   void ReplaceAllUsesWith(SDValue From, SDValue Op,
                           DAGUpdateListener *UpdateListener = 0);
@@ -901,7 +901,7 @@ public:
   SmallVector<SDDbgValue*,2> &GetDbgValues(const SDNode* SD) {
     return DbgInfo->getSDDbgValues(SD);
   }
-  
+
   /// TransferDbgValues - Transfer SDDbgValues.
   void TransferDbgValues(SDValue From, SDValue To);
 
@@ -911,11 +911,11 @@ public:
 
   SDDbgInfo::DbgIterator DbgBegin() { return DbgInfo->DbgBegin(); }
   SDDbgInfo::DbgIterator DbgEnd()   { return DbgInfo->DbgEnd(); }
-  SDDbgInfo::DbgIterator ByvalParmDbgBegin() { 
-    return DbgInfo->ByvalParmDbgBegin(); 
+  SDDbgInfo::DbgIterator ByvalParmDbgBegin() {
+    return DbgInfo->ByvalParmDbgBegin();
   }
-  SDDbgInfo::DbgIterator ByvalParmDbgEnd()   { 
-    return DbgInfo->ByvalParmDbgEnd(); 
+  SDDbgInfo::DbgIterator ByvalParmDbgEnd()   {
+    return DbgInfo->ByvalParmDbgEnd();
   }
 
   void dump() const;
@@ -972,7 +972,7 @@ public:
   /// semantics as an ADD.  This handles the equivalence:
   ///     X|Cst == X+Cst iff X&Cst = 0.
   bool isBaseWithConstantOffset(SDValue Op) const;
-  
+
   /// isKnownNeverNan - Test whether the given SDValue is known to never be NaN.
   bool isKnownNeverNaN(SDValue Op) const;
 
@@ -997,8 +997,8 @@ public:
   /// vector op and fill the end of the resulting vector with UNDEFS.
   SDValue UnrollVectorOp(SDNode *N, unsigned ResNE = 0);
 
-  /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a 
-  /// location that is 'Dist' units away from the location that the 'Base' load 
+  /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+  /// location that is 'Dist' units away from the location that the 'Base' load
   /// is loading from.
   bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
                          unsigned Bytes, int Dist) const;
@@ -1032,7 +1032,7 @@ private:
   std::vector<SDNode*> ValueTypeNodes;
   std::map<EVT, SDNode*, EVT::compareRawBits> ExtendedValueTypeNodes;
   StringMap<SDNode*> ExternalSymbols;
-  
+
   std::map<std::pair<std::string, unsigned char>,SDNode*> TargetExternalSymbols;
 };
 
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 62358e7..ecf3947 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -127,6 +127,7 @@ public:
 
     OPC_EmitInteger,
     OPC_EmitRegister,
+    OPC_EmitRegister2,
     OPC_EmitConvertToTarget,
     OPC_EmitMergeInputChains,
     OPC_EmitMergeInputChains1_0,
@@ -257,7 +258,7 @@ public:
   }
 
   virtual SDValue RunSDNodeXForm(SDValue V, unsigned XFormNo) {
-    assert(0 && "Tblgen shoudl generate this!");
+    assert(0 && "Tblgen should generate this!");
     return SDValue();
   }
 
@@ -279,7 +280,8 @@ private:
 
   void PrepareEHLandingPad();
   void SelectAllBasicBlocks(const Function &Fn);
-  bool TryToFoldFastISelLoad(const LoadInst *LI, FastISel *FastIS);
+  bool TryToFoldFastISelLoad(const LoadInst *LI, const Instruction *FoldInst,
+                             FastISel *FastIS);
   void FinishBasicBlock();
 
   void SelectBasicBlock(BasicBlock::const_iterator Begin,
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 6454639..9d265f1 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -838,7 +838,7 @@ public:
 
 
 /// HandleSDNode - This class is used to form a handle around another node that
-/// is persistant and is updated across invocations of replaceAllUsesWith on its
+/// is persistent and is updated across invocations of replaceAllUsesWith on its
 /// operand.  This node should be directly created by end-users and not added to
 /// the AllNodes list.
 class HandleSDNode : public SDNode {
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index 1da1e91be..33ce675 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -34,77 +34,35 @@ namespace llvm {
   /// SlotIndex & SlotIndexes classes for the public interface to this
   /// information.
   class IndexListEntry {
-    static const unsigned EMPTY_KEY_INDEX = ~0U & ~3U,
-                          TOMBSTONE_KEY_INDEX = ~0U & ~7U;
-
     IndexListEntry *next, *prev;
     MachineInstr *mi;
     unsigned index;
 
-  protected:
-
-    typedef enum { EMPTY_KEY, TOMBSTONE_KEY } ReservedEntryType;
-
-    // This constructor is only to be used by getEmptyKeyEntry
-    // & getTombstoneKeyEntry. It sets index to the given
-    // value and mi to zero.
-    IndexListEntry(ReservedEntryType r) : mi(0) {
-      switch(r) {
-        case EMPTY_KEY: index = EMPTY_KEY_INDEX; break;
-        case TOMBSTONE_KEY: index = TOMBSTONE_KEY_INDEX; break;
-        default: assert(false && "Invalid value for constructor."); 
-      }
-      next = this;
-      prev = this;
-    }
-
   public:
 
-    IndexListEntry(MachineInstr *mi, unsigned index) : mi(mi), index(index) {
-      assert(index != EMPTY_KEY_INDEX && index != TOMBSTONE_KEY_INDEX &&
-             "Attempt to create invalid index. "
-             "Available indexes may have been exhausted?.");
-    }
-
-    bool isValid() const {
-      return (index != EMPTY_KEY_INDEX && index != TOMBSTONE_KEY_INDEX);
-    }
+    IndexListEntry(MachineInstr *mi, unsigned index) : mi(mi), index(index) {}
 
     MachineInstr* getInstr() const { return mi; }
     void setInstr(MachineInstr *mi) {
-      assert(isValid() && "Attempt to modify reserved index.");
       this->mi = mi;
     }
 
     unsigned getIndex() const { return index; }
     void setIndex(unsigned index) {
-      assert(index != EMPTY_KEY_INDEX && index != TOMBSTONE_KEY_INDEX &&
-             "Attempt to set index to invalid value.");
-      assert(isValid() && "Attempt to reset reserved index value.");
       this->index = index;
     }
     
     IndexListEntry* getNext() { return next; }
     const IndexListEntry* getNext() const { return next; }
     void setNext(IndexListEntry *next) {
-      assert(isValid() && "Attempt to modify reserved index.");
       this->next = next;
     }
 
     IndexListEntry* getPrev() { return prev; }
     const IndexListEntry* getPrev() const { return prev; }
     void setPrev(IndexListEntry *prev) {
-      assert(isValid() && "Attempt to modify reserved index.");
       this->prev = prev;
     }
-
-    // This function returns the index list entry that is to be used for empty
-    // SlotIndex keys.
-    static IndexListEntry* getEmptyKeyEntry();
-
-    // This function returns the index list entry that is to be used for
-    // tombstone SlotIndex keys.
-    static IndexListEntry* getTombstoneKeyEntry();
   };
 
   // Specialize PointerLikeTypeTraits for IndexListEntry.
@@ -130,11 +88,10 @@ namespace llvm {
     PointerIntPair<IndexListEntry*, 2, unsigned> lie;
 
     SlotIndex(IndexListEntry *entry, unsigned slot)
-      : lie(entry, slot) {
-      assert(entry != 0 && "Attempt to construct index with 0 pointer.");
-    }
+      : lie(entry, slot) {}
 
     IndexListEntry& entry() const {
+      assert(isValid() && "Attempt to compare reserved index.");
       return *lie.getPointer();
     }
 
@@ -148,22 +105,27 @@ namespace llvm {
     }
 
     static inline unsigned getHashValue(const SlotIndex &v) {
-      IndexListEntry *ptrVal = &v.entry();
-      return (unsigned((intptr_t)ptrVal) >> 4) ^
-             (unsigned((intptr_t)ptrVal) >> 9);
+      void *ptrVal = v.lie.getOpaqueValue();
+      return (unsigned((intptr_t)ptrVal)) ^ (unsigned((intptr_t)ptrVal) >> 9);
     }
 
   public:
+    enum {
+      /// The default distance between instructions as returned by distance().
+      /// This may vary as instructions are inserted and removed.
+      InstrDist = 4*NUM
+    };
+
     static inline SlotIndex getEmptyKey() {
-      return SlotIndex(IndexListEntry::getEmptyKeyEntry(), 0);
+      return SlotIndex(0, 1);
     }
 
     static inline SlotIndex getTombstoneKey() {
-      return SlotIndex(IndexListEntry::getTombstoneKeyEntry(), 0);
+      return SlotIndex(0, 2);
     }
 
     /// Construct an invalid index.
-    SlotIndex() : lie(IndexListEntry::getEmptyKeyEntry(), 0) {}
+    SlotIndex() : lie(0, 0) {}
 
     // Construct a new slot index from the given one, and set the slot.
     SlotIndex(const SlotIndex &li, Slot s)
@@ -175,8 +137,7 @@ namespace llvm {
     /// Returns true if this is a valid index. Invalid indicies do
     /// not point into an index table, and cannot be compared.
     bool isValid() const {
-      IndexListEntry *entry = lie.getPointer();
-      return ((entry!= 0) && (entry->isValid()));
+      return lie.getPointer();
     }
 
     /// Print this index to the given raw_ostream.
@@ -187,11 +148,11 @@ namespace llvm {
 
     /// Compare two SlotIndex objects for equality.
     bool operator==(SlotIndex other) const {
-      return getIndex() == other.getIndex();
+      return lie == other.lie;
     }
     /// Compare two SlotIndex objects for inequality.
     bool operator!=(SlotIndex other) const {
-      return getIndex() != other.getIndex(); 
+      return lie != other.lie;
     }
    
     /// Compare two SlotIndex objects. Return true if the first index
@@ -217,6 +178,11 @@ namespace llvm {
       return getIndex() >= other.getIndex();
     }
 
+    /// isSameInstr - Return true if A and B refer to the same instruction.
+    static bool isSameInstr(SlotIndex A, SlotIndex B) {
+      return A.lie.getPointer() == B.lie.getPointer();
+    }
+
     /// Return the distance from this index to the given one.
     int distance(SlotIndex other) const {
       return other.getIndex() - getIndex();
@@ -376,15 +342,12 @@ namespace llvm {
     typedef DenseMap<const MachineInstr*, SlotIndex> Mi2IndexMap;
     Mi2IndexMap mi2iMap;
 
-    /// MBB2IdxMap - The indexes of the first and last instructions in the
-    /// specified basic block.
-    typedef DenseMap<const MachineBasicBlock*,
-                     std::pair<SlotIndex, SlotIndex> > MBB2IdxMap;
-    MBB2IdxMap mbb2IdxMap;
+    /// MBBRanges - Map MBB number to (start, stop) indexes.
+    SmallVector<std::pair<SlotIndex, SlotIndex>, 8> MBBRanges;
 
     /// Idx2MBBMap - Sorted list of pairs of index of first instruction
     /// and MBB id.
-    std::vector<IdxMBBPair> idx2MBBMap;
+    SmallVector<IdxMBBPair, 8> idx2MBBMap;
 
     // IndexListEntry allocator.
     BumpPtrAllocator ileAllocator;
@@ -466,6 +429,9 @@ namespace llvm {
       insert(getTail(), val);
     }
 
+    /// Renumber locally after inserting newEntry.
+    void renumberIndexes(IndexListEntry *newEntry);
+
   public:
     static char ID;
 
@@ -530,7 +496,7 @@ namespace llvm {
     /// Returns the instruction for the given index, or null if the given
     /// index has no instruction associated with it.
     MachineInstr* getInstructionFromIndex(SlotIndex index) const {
-      return index.entry().getInstr();
+      return index.isValid() ? index.entry().getInstr() : 0;
     }
 
     /// Returns the next non-null index.
@@ -545,12 +511,55 @@ namespace llvm {
       return nextNonNull;
     }
 
+    /// getIndexBefore - Returns the index of the last indexed instruction
+    /// before MI, or the the start index of its basic block.
+    /// MI is not required to have an index.
+    SlotIndex getIndexBefore(const MachineInstr *MI) const {
+      const MachineBasicBlock *MBB = MI->getParent();
+      assert(MBB && "MI must be inserted inna basic block");
+      MachineBasicBlock::const_iterator I = MI, B = MBB->begin();
+      for (;;) {
+        if (I == B)
+          return getMBBStartIdx(MBB);
+        --I;
+        Mi2IndexMap::const_iterator MapItr = mi2iMap.find(I);
+        if (MapItr != mi2iMap.end())
+          return MapItr->second;
+      }
+    }
+
+    /// getIndexAfter - Returns the index of the first indexed instruction
+    /// after MI, or the end index of its basic block.
+    /// MI is not required to have an index.
+    SlotIndex getIndexAfter(const MachineInstr *MI) const {
+      const MachineBasicBlock *MBB = MI->getParent();
+      assert(MBB && "MI must be inserted inna basic block");
+      MachineBasicBlock::const_iterator I = MI, E = MBB->end();
+      for (;;) {
+        ++I;
+        if (I == E)
+          return getMBBEndIdx(MBB);
+        Mi2IndexMap::const_iterator MapItr = mi2iMap.find(I);
+        if (MapItr != mi2iMap.end())
+          return MapItr->second;
+      }
+    }
+
+    /// Return the (start,end) range of the given basic block number.
+    const std::pair<SlotIndex, SlotIndex> &
+    getMBBRange(unsigned Num) const {
+      return MBBRanges[Num];
+    }
+
     /// Return the (start,end) range of the given basic block.
     const std::pair<SlotIndex, SlotIndex> &
-    getMBBRange(const MachineBasicBlock *mbb) const {
-      MBB2IdxMap::const_iterator itr = mbb2IdxMap.find(mbb);
-      assert(itr != mbb2IdxMap.end() && "MBB not found in maps.");
-      return itr->second;
+    getMBBRange(const MachineBasicBlock *MBB) const {
+      return getMBBRange(MBB->getNumber());
+    }
+
+    /// Returns the first index in the given basic block number.
+    SlotIndex getMBBStartIdx(unsigned Num) const {
+      return getMBBRange(Num).first;
     }
 
     /// Returns the first index in the given basic block.
@@ -558,6 +567,11 @@ namespace llvm {
       return getMBBRange(mbb).first;
     }
 
+    /// Returns the last index in the given basic block number.
+    SlotIndex getMBBEndIdx(unsigned Num) const {
+      return getMBBRange(Num).second;
+    }
+
     /// Returns the last index in the given basic block.
     SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const {
       return getMBBRange(mbb).second;
@@ -565,10 +579,12 @@ namespace llvm {
 
     /// Returns the basic block which the given index falls in.
     MachineBasicBlock* getMBBFromIndex(SlotIndex index) const {
-      std::vector<IdxMBBPair>::const_iterator I =
+      if (MachineInstr *MI = getInstructionFromIndex(index))
+        return MI->getParent();
+      SmallVectorImpl<IdxMBBPair>::const_iterator I =
         std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), index);
       // Take the pair containing the index
-      std::vector<IdxMBBPair>::const_iterator J =
+      SmallVectorImpl<IdxMBBPair>::const_iterator J =
         ((I != idx2MBBMap.end() && I->first > index) ||
          (I == idx2MBBMap.end() && idx2MBBMap.size()>0)) ? (I-1): I;
 
@@ -580,7 +596,7 @@ namespace llvm {
 
     bool findLiveInMBBs(SlotIndex start, SlotIndex end,
                         SmallVectorImpl<MachineBasicBlock*> &mbbs) const {
-      std::vector<IdxMBBPair>::const_iterator itr =
+      SmallVectorImpl<IdxMBBPair>::const_iterator itr =
         std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start);
       bool resVal = false;
 
@@ -600,7 +616,7 @@ namespace llvm {
 
       assert(start < end && "Backwards ranges not allowed.");
 
-      std::vector<IdxMBBPair>::const_iterator itr =
+      SmallVectorImpl<IdxMBBPair>::const_iterator itr =
         std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start);
 
       if (itr == idx2MBBMap.end()) {
@@ -622,95 +638,47 @@ namespace llvm {
 
     /// Insert the given machine instruction into the mapping. Returns the
     /// assigned index.
-    SlotIndex insertMachineInstrInMaps(MachineInstr *mi,
-                                        bool *deferredRenumber = 0) {
+    /// If Late is set and there are null indexes between mi's neighboring
+    /// instructions, create the new index after the null indexes instead of
+    /// before them.
+    SlotIndex insertMachineInstrInMaps(MachineInstr *mi, bool Late = false) {
       assert(mi2iMap.find(mi) == mi2iMap.end() && "Instr already indexed.");
       // Numbering DBG_VALUE instructions could cause code generation to be
       // affected by debug information.
       assert(!mi->isDebugValue() && "Cannot number DBG_VALUE instructions.");
 
-      MachineBasicBlock *mbb = mi->getParent();
-
-      assert(mbb != 0 && "Instr must be added to function.");
+      assert(mi->getParent() != 0 && "Instr must be added to function.");
 
-      MBB2IdxMap::iterator mbbRangeItr = mbb2IdxMap.find(mbb);
-
-      assert(mbbRangeItr != mbb2IdxMap.end() &&
-             "Instruction's parent MBB has not been added to SlotIndexes.");
-
-      MachineBasicBlock::iterator miItr(mi);
-      bool needRenumber = false;
-      IndexListEntry *newEntry;
-      // Get previous index, considering that not all instructions are indexed.
-      IndexListEntry *prevEntry;
-      for (;;) {
-        // If mi is at the mbb beginning, get the prev index from the mbb.
-        if (miItr == mbb->begin()) {
-          prevEntry = &mbbRangeItr->second.first.entry();
-          break;
-        }
-        // Otherwise rewind until we find a mapped instruction.
-        Mi2IndexMap::const_iterator itr = mi2iMap.find(--miItr);
-        if (itr != mi2iMap.end()) {
-          prevEntry = &itr->second.entry();
-          break;
-        }
+      // Get the entries where mi should be inserted.
+      IndexListEntry *prevEntry, *nextEntry;
+      if (Late) {
+        // Insert mi's index immediately before the following instruction.
+        nextEntry = &getIndexAfter(mi).entry();
+        prevEntry = nextEntry->getPrev();
+      } else {
+        // Insert mi's index immediately after the preceeding instruction.
+        prevEntry = &getIndexBefore(mi).entry();
+        nextEntry = prevEntry->getNext();
       }
 
-      // Get next entry from previous entry.
-      IndexListEntry *nextEntry = prevEntry->getNext();
-
       // Get a number for the new instr, or 0 if there's no room currently.
       // In the latter case we'll force a renumber later.
-      unsigned dist = nextEntry->getIndex() - prevEntry->getIndex();
-      unsigned newNumber = dist > SlotIndex::NUM ?
-        prevEntry->getIndex() + ((dist >> 1) & ~3U) : 0;
-
-      if (newNumber == 0) {
-        needRenumber = true;
-      }
+      unsigned dist = ((nextEntry->getIndex() - prevEntry->getIndex())/2) & ~3u;
+      unsigned newNumber = prevEntry->getIndex() + dist;
 
       // Insert a new list entry for mi.
-      newEntry = createEntry(mi, newNumber);
+      IndexListEntry *newEntry = createEntry(mi, newNumber);
       insert(nextEntry, newEntry);
-  
-      SlotIndex newIndex(newEntry, SlotIndex::LOAD);
-      mi2iMap.insert(std::make_pair(mi, newIndex));
-
-      if (miItr == mbb->end()) {
-        // If this is the last instr in the MBB then we need to fix up the bb
-        // range:
-        mbbRangeItr->second.second = SlotIndex(newEntry, SlotIndex::STORE);
-      }
 
-      // Renumber if we need to.
-      if (needRenumber) {
-        if (deferredRenumber == 0)
-          renumberIndexes();
-        else
-          *deferredRenumber = true;
-      }
+      // Renumber locally if we need to.
+      if (dist == 0)
+        renumberIndexes(newEntry);
 
+      SlotIndex newIndex(newEntry, SlotIndex::LOAD);
+      mi2iMap.insert(std::make_pair(mi, newIndex));
       return newIndex;
     }
 
-    /// Add all instructions in the vector to the index list. This method will
-    /// defer renumbering until all instrs have been added, and should be 
-    /// preferred when adding multiple instrs.
-    void insertMachineInstrsInMaps(SmallVectorImpl<MachineInstr*> &mis) {
-      bool renumber = false;
-
-      for (SmallVectorImpl<MachineInstr*>::iterator
-           miItr = mis.begin(), miEnd = mis.end();
-           miItr != miEnd; ++miItr) {
-        insertMachineInstrInMaps(*miItr, &renumber);
-      }
-
-      if (renumber)
-        renumberIndexes();
-    }
-
-
     /// Remove the given machine instruction from the mapping.
     void removeMachineInstrFromMaps(MachineInstr *mi) {
       // remove index -> MachineInstr and
@@ -760,21 +728,14 @@ namespace llvm {
       SlotIndex startIdx(startEntry, SlotIndex::LOAD);
       SlotIndex endIdx(nextEntry, SlotIndex::LOAD);
 
-      mbb2IdxMap.insert(
-        std::make_pair(mbb, std::make_pair(startIdx, endIdx)));
+      assert(unsigned(mbb->getNumber()) == MBBRanges.size() &&
+             "Blocks must be added in order");
+      MBBRanges.push_back(std::make_pair(startIdx, endIdx));
 
       idx2MBBMap.push_back(IdxMBBPair(startIdx, mbb));
 
-      if (MachineFunction::iterator(mbb) != mbb->getParent()->begin()) {
-        // Have to update the end index of the previous block.
-        MachineBasicBlock *priorMBB =
-          llvm::prior(MachineFunction::iterator(mbb));
-        mbb2IdxMap[priorMBB].second = startIdx;
-      }
-
       renumberIndexes();
       std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
-
     }
 
   };
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index fba3e48..829f580 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -59,6 +59,10 @@ public:
 
   virtual const MCSection *getEHFrameSection() const;
 
+  virtual void emitPersonalityValue(MCStreamer &Streamer,
+                                    const TargetMachine &TM,
+                                    const MCSymbol *Sym) const;
+
   const MCSection *getDataRelSection() const { return DataRelSection; }
 
   /// getSectionForConstant - Given a constant with the SectionKind, return a
@@ -81,6 +85,11 @@ public:
   getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
                                  MachineModuleInfo *MMI, unsigned Encoding,
                                  MCStreamer &Streamer) const;
+
+  // getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality.
+  virtual MCSymbol *
+  getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+                          MachineModuleInfo *MMI) const;
 };
 
 
@@ -94,7 +103,7 @@ class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile {
   ///
   const MCSection *TLSBSSSection;         // Defaults to ".tbss".
   
-  /// TLSTLVSection - Section for thread local structure infomation.
+  /// TLSTLVSection - Section for thread local structure information.
   /// Contains the source code name of the variable, visibility and a pointer
   /// to the initial value (.tdata or .tbss).
   const MCSection *TLSTLVSection;         // Defaults to ".tlv".
@@ -172,9 +181,14 @@ public:
                                  MachineModuleInfo *MMI, unsigned Encoding,
                                  MCStreamer &Streamer) const;
 
+  // getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality.
+  virtual MCSymbol *
+  getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+                          MachineModuleInfo *MMI) const;
+
   virtual unsigned getPersonalityEncoding() const;
   virtual unsigned getLSDAEncoding() const;
-  virtual unsigned getFDEEncoding() const;
+  virtual unsigned getFDEEncoding(bool CFI) const;
   virtual unsigned getTTypeEncoding() const;
 };
 
diff --git a/include/llvm/CompilerDriver/CompilationGraph.h b/include/llvm/CompilerDriver/CompilationGraph.h
index e1eea32..951aff6 100644
--- a/include/llvm/CompilerDriver/CompilationGraph.h
+++ b/include/llvm/CompilerDriver/CompilationGraph.h
@@ -40,7 +40,7 @@ namespace llvmc {
   };
 
   /// Edge - Represents an edge of the compilation graph.
-  class Edge : public llvm::RefCountedBaseVPTR<Edge> {
+  class Edge : public llvm::RefCountedBaseVPTR {
   public:
     Edge(const std::string& T) : ToolName_(T) {}
     virtual ~Edge() {}
diff --git a/include/llvm/CompilerDriver/Tool.h b/include/llvm/CompilerDriver/Tool.h
index d0926ba..18a2b76 100644
--- a/include/llvm/CompilerDriver/Tool.h
+++ b/include/llvm/CompilerDriver/Tool.h
@@ -33,7 +33,7 @@ namespace llvmc {
   typedef llvm::StringSet<> InputLanguagesSet;
 
   /// Tool - Represents a single tool.
-  class Tool : public llvm::RefCountedBaseVPTR<Tool> {
+  class Tool : public llvm::RefCountedBaseVPTR {
   public:
 
     virtual ~Tool() {}
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index bf69375..755daa6 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -196,6 +196,9 @@
 /* Define to 1 if you have the `udis86' library (-ludis86). */
 #undef HAVE_LIBUDIS86
 
+/* Type of 1st arg on ELM Callback */
+#cmakedefine WIN32_ELMCB_PCSTR ${WIN32_ELMCB_PCSTR}
+
 /* Define to 1 if you have the <limits.h> header file. */
 #cmakedefine HAVE_LIMITS_H ${HAVE_LIMITS_H}
 
@@ -453,7 +456,7 @@
 #cmakedefine HAVE_WRITEV ${HAVE_WRITEV}
 
 /* Define if the xdot.py program is available */
-#undef HAVE_XDOT_PY
+#cmakedefine HAVE_XDOT_PY ${HAVE_XDOT_PY}
 
 /* Have host's _alloca */
 #cmakedefine HAVE__ALLOCA ${HAVE__ALLOCA}
@@ -585,7 +588,7 @@
 #cmakedefine LLVM_PATH_TWOPI "${LLVM_PATH_TWOPI}"
 
 /* Define to path to xdot.py program if found or 'echo xdot.py' otherwise */
-#undef LLVM_PATH_XDOT_PY
+#cmakedefine LLVM_PATH_XDOT_PY "${LLVM_PATH_XDOT_PY}"
 
 /* Installation prefix directory */
 #cmakedefine LLVM_PREFIX "${LLVM_PREFIX}"
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index 14c44b4..10a8935 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -567,6 +567,9 @@
 /* LLVM architecture name for the native architecture, if available */
 #undef LLVM_NATIVE_ARCH
 
+/* LLVM name for the native AsmParser init function, if available */
+#undef LLVM_NATIVE_ASMPARSER
+
 /* LLVM name for the native AsmPrinter init function, if available */
 #undef LLVM_NATIVE_ASMPRINTER
 
@@ -672,6 +675,9 @@
 /* Define if use udis86 library */
 #undef USE_UDIS86
 
+/* Type of 1st arg on ELM Callback */
+#undef WIN32_ELMCB_PCSTR
+
 /* Define to empty if `const' does not conform to ANSI C. */
 #undef const
 
diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
index a679b95..9a9cb3b 100644
--- a/include/llvm/Config/llvm-config.h.cmake
+++ b/include/llvm/Config/llvm-config.h.cmake
@@ -61,6 +61,9 @@
 /* LLVM name for the native AsmPrinter init function, if available */
 #cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter
 
+/* LLVM name for the native AsmPrinter init function, if available */
+#cmakedefine LLVM_NATIVE_ASMPARSER LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser
+
 /* Define if this is Unixish platform */
 #cmakedefine LLVM_ON_UNIX ${LLVM_ON_UNIX}
 
@@ -91,6 +94,9 @@
 /* Define to path to twopi program if found or 'echo twopi' otherwise */
 #cmakedefine LLVM_PATH_TWOPI "${LLVM_PATH_TWOPI}"
 
+/* Define to path to xdot.py program if found or 'echo xdot.py' otherwise */
+#cmakedefine LLVM_PATH_XDOT_PY "${LLVM_PATH_XDOT.PY}"
+
 /* Installation prefix directory */
 #cmakedefine LLVM_PREFIX "${LLVM_PREFIX}"
 
diff --git a/include/llvm/Config/llvm-config.h.in b/include/llvm/Config/llvm-config.h.in
index e7a04ee..4766a7a 100644
--- a/include/llvm/Config/llvm-config.h.in
+++ b/include/llvm/Config/llvm-config.h.in
@@ -61,6 +61,9 @@
 /* LLVM name for the native AsmPrinter init function, if available */
 #undef LLVM_NATIVE_ASMPRINTER
 
+/* LLVM name for the native AsmPrinter init function, if available */
+#undef LLVM_NATIVE_ASMPARSER
+
 /* Define if this is Unixish platform */
 #undef LLVM_ON_UNIX
 
diff --git a/include/llvm/Constant.h b/include/llvm/Constant.h
index 38045fc..5f32ce0 100644
--- a/include/llvm/Constant.h
+++ b/include/llvm/Constant.h
@@ -47,10 +47,6 @@ protected:
     : User(ty, vty, Ops, NumOps) {}
 
   void destroyConstantImpl();
-  
-  void setOperand(unsigned i, Value *V) {
-    User::setOperand(i, V);
-  }
 public:
   /// isNullValue - Return true if this is the value that would be returned by
   /// getNullValue.
@@ -90,15 +86,6 @@ public:
   /// FIXME: This really should not be in VMCore.
   PossibleRelocationsTy getRelocationInfo() const;
   
-  // Specialize get/setOperand for Users as their operands are always
-  // constants or BasicBlocks as well.
-  User *getOperand(unsigned i) {
-    return static_cast<User*>(User::getOperand(i));
-  }
-  const User *getOperand(unsigned i) const {
-    return static_cast<const User*>(User::getOperand(i));
-  }
-  
   /// getVectorElements - This method, which is only valid on constant of vector
   /// type, returns the elements of the vector in the specified smallvector.
   /// This handles breaking down a vector undef into undef elements, etc.  For
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index c4768f8..eabc3a5 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -57,6 +57,8 @@ protected:
 public:
   static ConstantInt *getTrue(LLVMContext &Context);
   static ConstantInt *getFalse(LLVMContext &Context);
+  static Constant *getTrue(const Type *Ty);
+  static Constant *getFalse(const Type *Ty);
   
   /// If Ty is a vector type, return a Constant with a splat of the given
   /// value. Otherwise return a ConstantInt for the given value.
@@ -425,6 +427,8 @@ public:
                        const std::vector<Constant*> &V, bool Packed);
   static Constant *get(LLVMContext &Context,
                        Constant *const *Vals, unsigned NumVals, bool Packed);
+  static Constant *get(LLVMContext &Context, bool Packed,
+                       Constant * Val, ...) END_WITH_NULL;
 
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
@@ -599,6 +603,7 @@ struct OperandTraits<BlockAddress> :
 
 DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(BlockAddress, Value)
   
+
 //===----------------------------------------------------------------------===//
 /// ConstantExpr - a constant value that is initialized with an expression using
 /// other constant values.
@@ -836,7 +841,7 @@ public:
   static Constant *getICmp(unsigned short pred, Constant *LHS, Constant *RHS);
   static Constant *getFCmp(unsigned short pred, Constant *LHS, Constant *RHS);
 
-  /// Getelementptr form.  std::vector<Value*> is only accepted for convenience:
+  /// Getelementptr form.  Value* is only accepted for convenience;
   /// all elements must be Constant's.
   ///
   static Constant *getGetElementPtr(Constant *C,
@@ -880,7 +885,7 @@ public:
 
   /// getIndices - Assert that this is an insertvalue or exactvalue
   /// expression and return the list of indices.
-  const SmallVector<unsigned, 4> &getIndices() const;
+  ArrayRef<unsigned> getIndices() const;
 
   /// getOpcodeName - Return a string representation for an opcode.
   const char *getOpcodeName() const;
@@ -892,10 +897,7 @@ public:
   /// getWithOperands - This returns the current constant expression with the
   /// operands replaced with the specified values.  The specified operands must
   /// match count and type with the existing ones.
-  Constant *getWithOperands(const std::vector<Constant*> &Ops) const {
-    return getWithOperands(&Ops[0], (unsigned)Ops.size());
-  }
-  Constant *getWithOperands(Constant *const *Ops, unsigned NumOps) const;
+  Constant *getWithOperands(ArrayRef<Constant*> Ops) const;
   
   virtual void destroyConstant();
   virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
diff --git a/include/llvm/DebugInfoProbe.h b/include/llvm/DebugInfoProbe.h
new file mode 100644
index 0000000..78d00df
--- /dev/null
+++ b/include/llvm/DebugInfoProbe.h
@@ -0,0 +1,67 @@
+//===-- DebugInfoProbe.h - DebugInfo Probe ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a probe, DebugInfoProbe, that can be used by pass
+// manager to analyze how optimizer is treating debugging information.
+// 
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_DEBUGINFOPROBE_H
+#define LLVM_TRANSFORMS_UTILS_DEBUGINFOPROBE_H
+
+#include "llvm/ADT/StringMap.h"
+
+namespace llvm {
+  class Function;
+  class Pass;
+  class DebugInfoProbeImpl;
+
+  /// DebugInfoProbe - This class provides a interface to monitor
+  /// how an optimization pass is preserving debugging information.
+  class DebugInfoProbe {
+    public:
+    DebugInfoProbe();
+    ~DebugInfoProbe();
+
+    /// initialize - Collect information before running an optimization pass.
+    void initialize(StringRef PName, Function &F);
+
+    /// finalize - Collect information after running an optimization pass. This
+    /// must be used after initialization.
+    void finalize(Function &F);
+
+    /// report - Report findings. This should be invoked after finalize.
+    void report();
+
+    private:
+    DebugInfoProbeImpl *pImpl;
+  };
+
+  /// DebugInfoProbeInfo - This class provides an interface that a pass manager
+  /// can use to manage debug info probes.
+  class DebugInfoProbeInfo {
+    StringMap<DebugInfoProbe *> Probes;
+  public:
+    DebugInfoProbeInfo() {}
+
+    /// ~DebugInfoProbeInfo - Report data collected by all probes before deleting
+    /// them.
+    ~DebugInfoProbeInfo();
+
+    /// initialize - Collect information before running an optimization pass.
+    void initialize(Pass *P, Function &F);
+
+    /// finalize - Collect information after running an optimization pass. This
+    /// must be used after initialization.
+    void finalize(Pass *P, Function &F);
+  };
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h
index 56d1e3e..f1cb330 100644
--- a/include/llvm/DerivedTypes.h
+++ b/include/llvm/DerivedTypes.h
@@ -19,6 +19,7 @@
 #define LLVM_DERIVED_TYPES_H
 
 #include "llvm/Type.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
@@ -147,7 +148,7 @@ class FunctionType : public DerivedType {
 
   FunctionType(const FunctionType &);                   // Do not implement
   const FunctionType &operator=(const FunctionType &);  // Do not implement
-  FunctionType(const Type *Result, const std::vector<const Type*> &Params,
+  FunctionType(const Type *Result, ArrayRef<const Type*> Params,
                bool IsVarArgs);
 
 public:
@@ -156,7 +157,7 @@ public:
   ///
   static FunctionType *get(
     const Type *Result, ///< The result type
-    const std::vector<const Type*> &Params, ///< The types of the parameters
+    ArrayRef<const Type*> Params, ///< The types of the parameters
     bool isVarArg  ///< Whether this is a variable argument length function
   );
 
@@ -166,7 +167,7 @@ public:
     const Type *Result, ///< The result type
     bool isVarArg  ///< Whether this is a variable argument length function
   ) {
-    return get(Result, std::vector<const Type *>(), isVarArg);
+    return get(Result, ArrayRef<const Type *>(), isVarArg);
   }
 
   /// isValidReturnType - Return true if the specified type is valid as a return
@@ -237,20 +238,19 @@ class StructType : public CompositeType {
   friend class TypeMap<StructValType, StructType>;
   StructType(const StructType &);                   // Do not implement
   const StructType &operator=(const StructType &);  // Do not implement
-  StructType(LLVMContext &C,
-             const std::vector<const Type*> &Types, bool isPacked);
+  StructType(LLVMContext &C, ArrayRef<const Type*> Types, bool isPacked);
 public:
   /// StructType::get - This static method is the primary way to create a
   /// StructType.
   ///
   static StructType *get(LLVMContext &Context, 
-                         const std::vector<const Type*> &Params,
+                         ArrayRef<const Type*> Params,
                          bool isPacked=false);
 
   /// StructType::get - Create an empty structure type.
   ///
   static StructType *get(LLVMContext &Context, bool isPacked=false) {
-    return get(Context, std::vector<const Type*>(), isPacked);
+    return get(Context, llvm::ArrayRef<const Type*>(), isPacked);
   }
 
   /// StructType::get - This static method is a convenience method for
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index 71698fa..a01ad3a 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -21,6 +21,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/ValueMap.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Target/TargetMachine.h"
@@ -117,11 +118,11 @@ protected:
   /// The list of Modules that we are JIT'ing from.  We use a SmallVector to
   /// optimize for the case where there is only one module.
   SmallVector<Module*, 1> Modules;
-  
+
   void setTargetData(const TargetData *td) {
     TD = td;
   }
-  
+
   /// getMemoryforGV - Allocate memory for a global variable.
   virtual char *getMemoryForGV(const GlobalVariable *GV);
 
@@ -155,13 +156,15 @@ protected:
   /// pointer is invoked to create it.  If this returns null, the JIT will
   /// abort.
   void *(*LazyFunctionCreator)(const std::string &);
-  
+
   /// ExceptionTableRegister - If Exception Handling is set, the JIT will
   /// register dwarf tables with this function.
   typedef void (*EERegisterFn)(void*);
   EERegisterFn ExceptionTableRegister;
   EERegisterFn ExceptionTableDeregister;
-  std::vector<void*> AllExceptionTables;
+  /// This maps functions to their exception tables frames.
+  DenseMap<const Function*, void*> AllExceptionTables;
+
 
 public:
   /// lock - This lock protects the ExecutionEngine, JIT, JITResolver and
@@ -182,7 +185,7 @@ public:
   /// \param GVsWithCode - Allocating globals with code breaks
   /// freeMachineCodeForFunction and is probably unsafe and bad for performance.
   /// However, we have clients who depend on this behavior, so we must support
-  /// it.  Eventually, when we're willing to break some backwards compatability,
+  /// it.  Eventually, when we're willing to break some backwards compatibility,
   /// this flag should be flipped to false, so that by default
   /// freeMachineCodeForFunction works.
   static ExecutionEngine *create(Module *M,
@@ -213,7 +216,7 @@ public:
   virtual void addModule(Module *M) {
     Modules.push_back(M);
   }
-  
+
   //===--------------------------------------------------------------------===//
 
   const TargetData *getTargetData() const { return TD; }
@@ -226,7 +229,7 @@ public:
   /// defines FnName.  This is very slow operation and shouldn't be used for
   /// general code.
   Function *FindFunctionNamed(const char *FnName);
-  
+
   /// runFunction - Execute the specified function with the specified arguments,
   /// and return the result.
   virtual GenericValue runFunction(Function *F,
@@ -243,8 +246,8 @@ public:
   ///
   /// \param isDtors - Run the destructors instead of constructors.
   void runStaticConstructorsDestructors(Module *module, bool isDtors);
-  
-  
+
+
   /// runFunctionAsMain - This is a helper function which wraps runFunction to
   /// handle the common task of starting up main with the specified argc, argv,
   /// and envp parameters.
@@ -259,21 +262,21 @@ public:
   /// existing data in memory.  Mappings are automatically removed when their
   /// GlobalValue is destroyed.
   void addGlobalMapping(const GlobalValue *GV, void *Addr);
-  
+
   /// clearAllGlobalMappings - Clear all global mappings and start over again,
   /// for use in dynamic compilation scenarios to move globals.
   void clearAllGlobalMappings();
-  
+
   /// clearGlobalMappingsFromModule - Clear all global mappings that came from a
   /// particular module, because it has been removed from the JIT.
   void clearGlobalMappingsFromModule(Module *M);
-  
+
   /// updateGlobalMapping - Replace an existing mapping for GV with a new
   /// address.  This updates both maps as required.  If "Addr" is null, the
   /// entry for the global is removed from the mappings.  This returns the old
   /// value of the pointer, or null if it was not in the map.
   void *updateGlobalMapping(const GlobalValue *GV, void *Addr);
-  
+
   /// getPointerToGlobalIfAvailable - This returns the address of the specified
   /// global value if it is has already been codegen'd, otherwise it returns
   /// null.
@@ -294,7 +297,7 @@ public:
   /// different ways.  Return the representation for a blockaddress of the
   /// specified block.
   virtual void *getPointerToBasicBlock(BasicBlock *BB) = 0;
-  
+
   /// getPointerToFunctionOrStub - If the specified function has been
   /// code-gen'd, return a pointer to the function.  If not, compile it, or use
   /// a stub to implement lazy compilation if available.  See
@@ -398,7 +401,7 @@ public:
   void InstallLazyFunctionCreator(void* (*P)(const std::string &)) {
     LazyFunctionCreator = P;
   }
-  
+
   /// InstallExceptionTableRegister - The JIT will use the given function
   /// to register the exception tables it generates.
   void InstallExceptionTableRegister(EERegisterFn F) {
@@ -407,13 +410,26 @@ public:
   void InstallExceptionTableDeregister(EERegisterFn F) {
     ExceptionTableDeregister = F;
   }
-  
+
   /// RegisterTable - Registers the given pointer as an exception table.  It
   /// uses the ExceptionTableRegister function.
-  void RegisterTable(void* res) {
+  void RegisterTable(const Function *fn, void* res) {
     if (ExceptionTableRegister) {
       ExceptionTableRegister(res);
-      AllExceptionTables.push_back(res);
+      AllExceptionTables[fn] = res;
+    }
+  }
+
+  /// DeregisterTable - Deregisters the exception frame previously registered
+  /// for the given function.
+  void DeregisterTable(const Function *Fn) {
+    if (ExceptionTableDeregister) {
+      DenseMap<const Function*, void*>::iterator frame =
+        AllExceptionTables.find(Fn);
+      if(frame != AllExceptionTables.end()) {
+        ExceptionTableDeregister(frame->second);
+        AllExceptionTables.erase(frame);
+      }
     }
   }
 
@@ -429,7 +445,7 @@ protected:
   void EmitGlobalVariable(const GlobalVariable *GV);
 
   GenericValue getConstantValue(const Constant *C);
-  void LoadValueFromMemory(GenericValue &Result, GenericValue *Ptr, 
+  void LoadValueFromMemory(GenericValue &Result, GenericValue *Ptr,
                            const Type *Ty);
 };
 
@@ -540,8 +556,9 @@ public:
 
   /// setUseMCJIT - Set whether the MC-JIT implementation should be used
   /// (experimental).
-  void setUseMCJIT(bool Value) {
+  EngineBuilder &setUseMCJIT(bool Value) {
     UseMCJIT = Value;
+    return *this;
   }
 
   /// setMAttrs - Set cpu-specific attributes.
diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h
index 3841418..a63f0da 100644
--- a/include/llvm/ExecutionEngine/JITMemoryManager.h
+++ b/include/llvm/ExecutionEngine/JITMemoryManager.h
@@ -29,11 +29,11 @@ protected:
 public:
   JITMemoryManager() : HasGOT(false) {}
   virtual ~JITMemoryManager();
-  
+
   /// CreateDefaultMemManager - This is used to create the default
   /// JIT Memory Manager if the client does not provide one to the JIT.
   static JITMemoryManager *CreateDefaultMemManager();
-  
+
   /// setMemoryWritable - When code generation is in progress,
   /// the code pages may need permissions changed.
   virtual void setMemoryWritable() = 0;
@@ -55,16 +55,16 @@ public:
   /// method is invoked to allocate it.  This method is required to set HasGOT
   /// to true.
   virtual void AllocateGOT() = 0;
-  
+
   /// isManagingGOT - Return true if the AllocateGOT method is called.
   bool isManagingGOT() const {
     return HasGOT;
   }
-  
+
   /// getGOTBase - If this is managing a Global Offset Table, this method should
   /// return a pointer to its base.
   virtual uint8_t *getGOTBase() const = 0;
-  
+
   //===--------------------------------------------------------------------===//
   // Main Allocation Functions
   //===--------------------------------------------------------------------===//
@@ -91,11 +91,11 @@ public:
   /// startFunctionBody.
   virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
                                 unsigned Alignment) = 0;
-  
+
   /// endFunctionBody - This method is called when the JIT is done codegen'ing
   /// the specified function.  At this point we know the size of the JIT
   /// compiled function.  This passes in FunctionStart (which was returned by
-  /// the startFunctionBody method) and FunctionEnd which is a pointer to the 
+  /// the startFunctionBody method) and FunctionEnd which is a pointer to the
   /// actual end of the function.  This method should mark the space allocated
   /// and remember where it is in case the client wants to deallocate it.
   virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart,
@@ -113,12 +113,12 @@ public:
   /// been deallocated yet.  This is never called when the JIT is currently
   /// emitting a function.
   virtual void deallocateFunctionBody(void *Body) = 0;
-  
+
   /// startExceptionTable - When we finished JITing the function, if exception
   /// handling is set, we emit the exception table.
   virtual uint8_t* startExceptionTable(const Function* F,
                                        uintptr_t &ActualSize) = 0;
-  
+
   /// endExceptionTable - This method is called when the JIT is done emitting
   /// the exception table.
   virtual void endExceptionTable(const Function *F, uint8_t *TableStart,
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
new file mode 100644
index 0000000..3dc65e3
--- /dev/null
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -0,0 +1,75 @@
+//===-- RuntimeDyld.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the runtime dynamic linker facilities of the MC-JIT.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIME_DYLD_H
+#define LLVM_RUNTIME_DYLD_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Memory.h"
+
+namespace llvm {
+
+class RuntimeDyldImpl;
+class MemoryBuffer;
+
+// RuntimeDyld clients often want to handle the memory management of
+// what gets placed where. For JIT clients, this is an abstraction layer
+// over the JITMemoryManager, which references objects by their source
+// representations in LLVM IR.
+// FIXME: As the RuntimeDyld fills out, additional routines will be needed
+//        for the varying types of objects to be allocated.
+class RTDyldMemoryManager {
+  RTDyldMemoryManager(const RTDyldMemoryManager&);  // DO NOT IMPLEMENT
+  void operator=(const RTDyldMemoryManager&);       // DO NOT IMPLEMENT
+public:
+  RTDyldMemoryManager() {}
+  virtual ~RTDyldMemoryManager();
+
+  // Allocate ActualSize bytes, or more, for the named function. Return
+  // a pointer to the allocated memory and update Size to reflect how much
+  // memory was acutally allocated.
+  virtual uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) = 0;
+
+  // Mark the end of the function, including how much of the allocated
+  // memory was actually used.
+  virtual void endFunctionBody(const char *Name, uint8_t *FunctionStart,
+                               uint8_t *FunctionEnd) = 0;
+};
+
+class RuntimeDyld {
+  RuntimeDyld(const RuntimeDyld &);     // DO NOT IMPLEMENT
+  void operator=(const RuntimeDyld &);  // DO NOT IMPLEMENT
+
+  // RuntimeDyldImpl is the actual class. RuntimeDyld is just the public
+  // interface.
+  RuntimeDyldImpl *Dyld;
+public:
+  RuntimeDyld(RTDyldMemoryManager*);
+  ~RuntimeDyld();
+
+  bool loadObject(MemoryBuffer *InputBuffer);
+  // Get the address of our local copy of the symbol. This may or may not
+  // be the address used for relocation (clients can copy the data around
+  // and resolve relocatons based on where they put it).
+  void *getSymbolAddress(StringRef Name);
+  // Resolve the relocations for all symbols we currently know about.
+  void resolveRelocations();
+  // Change the address associated with a symbol when resolving relocations.
+  // Any relocations already associated with the symbol will be re-resolved.
+  void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
+  StringRef getErrorString();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/GlobalVariable.h b/include/llvm/GlobalVariable.h
index 1769c66..442e0c0 100644
--- a/include/llvm/GlobalVariable.h
+++ b/include/llvm/GlobalVariable.h
@@ -12,7 +12,7 @@
 //
 // Global variables are constant pointers that refer to hunks of space that are
 // allocated by either the VM, or by the linker in a static compiler.  A global
-// variable may have an intial value, which is copied into the executables .data
+// variable may have an initial value, which is copied into the executables .data
 // area.  Global Constants are required to have initializers.
 //
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 02dbfbd..cca0194 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -94,12 +94,12 @@ void initializeDominatorTreePass(PassRegistry&);
 void initializeEdgeBundlesPass(PassRegistry&);
 void initializeEdgeProfilerPass(PassRegistry&);
 void initializePathProfilerPass(PassRegistry&);
+void initializeGCOVProfilerPass(PassRegistry&);
 void initializeEarlyCSEPass(PassRegistry&);
 void initializeExpandISelPseudosPass(PassRegistry&);
 void initializeFindUsedTypesPass(PassRegistry&);
 void initializeFunctionAttrsPass(PassRegistry&);
 void initializeGCModuleInfoPass(PassRegistry&);
-void initializeGEPSplitterPass(PassRegistry&);
 void initializeGVNPass(PassRegistry&);
 void initializeGlobalDCEPass(PassRegistry&);
 void initializeGlobalOptPass(PassRegistry&);
@@ -123,7 +123,6 @@ void initializeLintPass(PassRegistry&);
 void initializeLiveDebugVariablesPass(PassRegistry&);
 void initializeLiveIntervalsPass(PassRegistry&);
 void initializeLiveStacksPass(PassRegistry&);
-void initializeLiveValuesPass(PassRegistry&);
 void initializeLiveVariablesPass(PassRegistry&);
 void initializeLoaderPassPass(PassRegistry&);
 void initializePathProfileLoaderPassPass(PassRegistry&);
@@ -170,7 +169,6 @@ void initializePostDomOnlyPrinterPass(PassRegistry&);
 void initializePostDomOnlyViewerPass(PassRegistry&);
 void initializePostDomPrinterPass(PassRegistry&);
 void initializePostDomViewerPass(PassRegistry&);
-void initializePostDominanceFrontierPass(PassRegistry&);
 void initializePostDominatorTreePass(PassRegistry&);
 void initializePreAllocSplittingPass(PassRegistry&);
 void initializePreVerifierPass(PassRegistry&);
@@ -196,14 +194,12 @@ void initializeRegionViewerPass(PassRegistry&);
 void initializeRegisterCoalescerAnalysisGroup(PassRegistry&);
 void initializeRenderMachineFunctionPass(PassRegistry&);
 void initializeSCCPPass(PassRegistry&);
-void initializeSRETPromotionPass(PassRegistry&);
 void initializeSROA_DTPass(PassRegistry&);
 void initializeSROA_SSAUpPass(PassRegistry&);
 void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&);
 void initializeScalarEvolutionPass(PassRegistry&);
 void initializeSimpleInlinerPass(PassRegistry&);
 void initializeSimpleRegisterCoalescingPass(PassRegistry&);
-void initializeSimplifyHalfPowrLibCallsPass(PassRegistry&);
 void initializeSimplifyLibCallsPass(PassRegistry&);
 void initializeSingleLoopExtractorPass(PassRegistry&);
 void initializeSinkingPass(PassRegistry&);
diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h
index a166956..cc9ec3a 100644
--- a/include/llvm/InstrTypes.h
+++ b/include/llvm/InstrTypes.h
@@ -18,7 +18,6 @@
 
 #include "llvm/Instruction.h"
 #include "llvm/OperandTraits.h"
-#include "llvm/Operator.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/ADT/Twine.h"
 
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index 17ff763..54dfe39 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -584,7 +584,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)
 /// @brief Represent an integer comparison operator.
 class ICmpInst: public CmpInst {
 protected:
-  /// @brief Clone an indentical ICmpInst
+  /// @brief Clone an identical ICmpInst
   virtual ICmpInst *clone_impl() const;
 public:
   /// @brief Constructor with insert-before-instruction semantics.
@@ -735,7 +735,7 @@ public:
 /// @brief Represents a floating point comparison operator.
 class FCmpInst: public CmpInst {
 protected:
-  /// @brief Clone an indentical FCmpInst
+  /// @brief Clone an identical FCmpInst
   virtual FCmpInst *clone_impl() const;
 public:
   /// @brief Constructor with insert-before-instruction semantics.
@@ -1811,39 +1811,37 @@ class PHINode : public Instruction {
   void *operator new(size_t s) {
     return User::operator new(s, 0);
   }
-  explicit PHINode(const Type *Ty, const Twine &NameStr = "",
-                   Instruction *InsertBefore = 0)
+  explicit PHINode(const Type *Ty, unsigned NumReservedValues,
+                   const Twine &NameStr = "", Instruction *InsertBefore = 0)
     : Instruction(Ty, Instruction::PHI, 0, 0, InsertBefore),
-      ReservedSpace(0) {
+      ReservedSpace(NumReservedValues * 2) {
     setName(NameStr);
+    OperandList = allocHungoffUses(ReservedSpace);
   }
 
-  PHINode(const Type *Ty, const Twine &NameStr, BasicBlock *InsertAtEnd)
+  PHINode(const Type *Ty, unsigned NumReservedValues, const Twine &NameStr,
+          BasicBlock *InsertAtEnd)
     : Instruction(Ty, Instruction::PHI, 0, 0, InsertAtEnd),
-      ReservedSpace(0) {
+      ReservedSpace(NumReservedValues * 2) {
     setName(NameStr);
+    OperandList = allocHungoffUses(ReservedSpace);
   }
 protected:
   virtual PHINode *clone_impl() const;
 public:
-  static PHINode *Create(const Type *Ty, const Twine &NameStr = "",
+  /// Constructors - NumReservedValues is a hint for the number of incoming
+  /// edges that this phi node will have (use 0 if you really have no idea).
+  static PHINode *Create(const Type *Ty, unsigned NumReservedValues,
+                         const Twine &NameStr = "",
                          Instruction *InsertBefore = 0) {
-    return new PHINode(Ty, NameStr, InsertBefore);
+    return new PHINode(Ty, NumReservedValues, NameStr, InsertBefore);
   }
-  static PHINode *Create(const Type *Ty, const Twine &NameStr,
-                         BasicBlock *InsertAtEnd) {
-    return new PHINode(Ty, NameStr, InsertAtEnd);
+  static PHINode *Create(const Type *Ty, unsigned NumReservedValues, 
+                         const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    return new PHINode(Ty, NumReservedValues, NameStr, InsertAtEnd);
   }
   ~PHINode();
 
-  /// reserveOperandSpace - This method can be used to avoid repeated
-  /// reallocation of PHI operand lists by reserving space for the correct
-  /// number of operands before adding them.  Unlike normal vector reserves,
-  /// this method can also be used to trim the operand space.
-  void reserveOperandSpace(unsigned NumValues) {
-    resizeOperands(NumValues*2);
-  }
-
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -1912,7 +1910,7 @@ public:
            "All operands to PHI node must be the same type as the PHI node!");
     unsigned OpNo = NumOperands;
     if (OpNo+2 > ReservedSpace)
-      resizeOperands(0);  // Get more space!
+      growOperands();  // Get more space!
     // Initialize some new operands.
     NumOperands = OpNo+2;
     OperandList[OpNo] = V;
@@ -1962,7 +1960,7 @@ public:
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
  private:
-  void resizeOperands(unsigned NumOperands);
+  void growOperands();
 };
 
 template <>
@@ -2154,7 +2152,7 @@ class SwitchInst : public TerminatorInst {
   // Operand[2n+1] = BasicBlock to go to on match
   SwitchInst(const SwitchInst &SI);
   void init(Value *Value, BasicBlock *Default, unsigned NumReserved);
-  void resizeOperands(unsigned No);
+  void growOperands();
   // allocate space for exactly zero operands
   void *operator new(size_t s) {
     return User::operator new(s, 0);
@@ -2306,7 +2304,7 @@ class IndirectBrInst : public TerminatorInst {
   // Operand[2n+1] = BasicBlock to go to on match
   IndirectBrInst(const IndirectBrInst &IBI);
   void init(Value *Address, unsigned NumDests);
-  void resizeOperands(unsigned No);
+  void growOperands();
   // allocate space for exactly zero operands
   void *operator new(size_t s) {
     return User::operator new(s, 0);
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index 0c9be78..a63cd6a 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -30,7 +30,7 @@ class IntrinsicProperty;
 def IntrNoMem : IntrinsicProperty;
 
 // IntrReadArgMem - This intrinsic reads only from memory that one of its
-// arguments points to, but may read an unspecified amount.
+// pointer-typed arguments points to, but may read an unspecified amount.
 def IntrReadArgMem : IntrinsicProperty;
 
 // IntrReadMem - This intrinsic reads from unspecified memory, so it cannot be
@@ -307,7 +307,7 @@ let Properties = [IntrNoMem] in {
   def int_eh_sjlj_lsda    : Intrinsic<[llvm_ptr_ty]>;
   def int_eh_sjlj_callsite: Intrinsic<[], [llvm_i32_ty]>;
 }
-def int_eh_sjlj_dispatch_setup : Intrinsic<[], [llvm_ptr_ty]>;
+def int_eh_sjlj_dispatch_setup : Intrinsic<[], []>;
 def int_eh_sjlj_setjmp  : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
 def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty]>;
 
@@ -490,3 +490,4 @@ include "llvm/IntrinsicsARM.td"
 include "llvm/IntrinsicsCellSPU.td"
 include "llvm/IntrinsicsAlpha.td"
 include "llvm/IntrinsicsXCore.td"
+include "llvm/IntrinsicsPTX.td"
diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td
index 546538a..03e9261 100644
--- a/include/llvm/IntrinsicsARM.td
+++ b/include/llvm/IntrinsicsARM.td
@@ -1,10 +1,10 @@
 //===- IntrinsicsARM.td - Defines ARM intrinsics -----------*- tablegen -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file defines all of the ARM-specific intrinsics.
@@ -129,8 +129,12 @@ let Properties = [IntrNoMem, Commutative] in {
   def int_arm_neon_vmulp : Neon_2Arg_Intrinsic;
   def int_arm_neon_vqdmulh : Neon_2Arg_Intrinsic;
   def int_arm_neon_vqrdmulh : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vmulls : Neon_2Arg_Long_Intrinsic;
+  def int_arm_neon_vmullu : Neon_2Arg_Long_Intrinsic;
   def int_arm_neon_vmullp : Neon_2Arg_Long_Intrinsic;
   def int_arm_neon_vqdmull : Neon_2Arg_Long_Intrinsic;
+
+  // Vector Multiply and Accumulate/Subtract.
   def int_arm_neon_vqdmlal : Neon_3Arg_Long_Intrinsic;
   def int_arm_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic;
 
diff --git a/include/llvm/IntrinsicsPTX.td b/include/llvm/IntrinsicsPTX.td
new file mode 100644
index 0000000..28379c9
--- /dev/null
+++ b/include/llvm/IntrinsicsPTX.td
@@ -0,0 +1,92 @@
+//===- IntrinsicsPTX.td - Defines PTX intrinsics -----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the PTX-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "ptx" in {
+  multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> {
+// FIXME: Do we need the 128-bit integer type version?
+//    def _r64   : Intrinsic<[llvm_i128_ty],   [], [IntrNoMem]>;
+
+// FIXME: Enable this once v4i32 support is enabled in back-end.
+//    def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
+
+    def _x     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+                 GCCBuiltin<!strconcat(prefix, "_x")>;
+    def _y     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+                 GCCBuiltin<!strconcat(prefix, "_y")>;
+    def _z     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+                 GCCBuiltin<!strconcat(prefix, "_z")>;
+    def _w     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+                 GCCBuiltin<!strconcat(prefix, "_w")>;
+  }
+
+  class PTXReadSpecialRegisterIntrinsic_r32<string name>
+    : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+      GCCBuiltin<name>;
+
+  class PTXReadSpecialRegisterIntrinsic_r64<string name>
+    : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
+      GCCBuiltin<name>;
+}
+
+defm int_ptx_read_tid        : PTXReadSpecialRegisterIntrinsic_v4i32
+                               <"__builtin_ptx_read_tid">;
+defm int_ptx_read_ntid       : PTXReadSpecialRegisterIntrinsic_v4i32
+                               <"__builtin_ptx_read_ntid">;
+
+def int_ptx_read_laneid      : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_laneid">;
+def int_ptx_read_warpid      : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_warpid">;
+def int_ptx_read_nwarpid     : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_nwarpid">;
+
+defm int_ptx_read_ctaid      : PTXReadSpecialRegisterIntrinsic_v4i32
+                               <"__builtin_ptx_read_ctaid">;
+defm int_ptx_read_nctaid     : PTXReadSpecialRegisterIntrinsic_v4i32
+                               <"__builtin_ptx_read_nctaid">;
+
+def int_ptx_read_smid        : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_smid">;
+def int_ptx_read_nsmid       : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_nsmid">;
+def int_ptx_read_gridid      : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_gridid">;
+
+def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_lanemask_eq">;
+def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_lanemask_le">;
+def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_lanemask_lt">;
+def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_lanemask_ge">;
+def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_lanemask_gt">;
+
+def int_ptx_read_clock       : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_clock">;
+def int_ptx_read_clock64     : PTXReadSpecialRegisterIntrinsic_r64
+                               <"__builtin_ptx_read_clock64">;
+
+def int_ptx_read_pm0         : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_pm0">;
+def int_ptx_read_pm1         : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_pm1">;
+def int_ptx_read_pm2         : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_pm2">;
+def int_ptx_read_pm3         : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_pm3">;
+
+let TargetPrefix = "ptx" in
+  def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>,
+                         GCCBuiltin<"__builtin_ptx_bar_sync">;
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index 4946220..b44101a 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -18,6 +18,83 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 }
 
 //===----------------------------------------------------------------------===//
+// 3DNow!
+
+let TargetPrefix = "x86" in {
+  def int_x86_3dnow_pavgusb : GCCBuiltin<"__builtin_ia32_pavgusb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pf2id : GCCBuiltin<"__builtin_ia32_pf2id">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_3dnow_pfacc : GCCBuiltin<"__builtin_ia32_pfacc">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfadd : GCCBuiltin<"__builtin_ia32_pfadd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfcmpeq : GCCBuiltin<"__builtin_ia32_pfcmpeq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfcmpge : GCCBuiltin<"__builtin_ia32_pfcmpge">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfcmpgt : GCCBuiltin<"__builtin_ia32_pfcmpgt">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfmax : GCCBuiltin<"__builtin_ia32_pfmax">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfmin : GCCBuiltin<"__builtin_ia32_pfmin">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfmul : GCCBuiltin<"__builtin_ia32_pfmul">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfrcp : GCCBuiltin<"__builtin_ia32_pfrcp">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_3dnow_pfrcpit1 : GCCBuiltin<"__builtin_ia32_pfrcpit1">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfrcpit2 : GCCBuiltin<"__builtin_ia32_pfrcpit2">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfrsqrt : GCCBuiltin<"__builtin_ia32_pfrsqrt">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_3dnow_pfrsqit1 : GCCBuiltin<"__builtin_ia32_pfrsqit1">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfsub : GCCBuiltin<"__builtin_ia32_pfsub">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfsubr : GCCBuiltin<"__builtin_ia32_pfsubr">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pi2fd : GCCBuiltin<"__builtin_ia32_pi2fd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_3dnow_pmulhrw : GCCBuiltin<"__builtin_ia32_pmulhrw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// 3DNow! extensions
+
+let TargetPrefix = "x86" in {
+  def int_x86_3dnowa_pf2iw : GCCBuiltin<"__builtin_ia32_pf2iw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_3dnowa_pfnacc : GCCBuiltin<"__builtin_ia32_pfnacc">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnowa_pfpnacc : GCCBuiltin<"__builtin_ia32_pfpnacc">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnowa_pi2fw : GCCBuiltin<"__builtin_ia32_pi2fw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_3dnowa_pswapd :
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
 // SSE1
 
 // Arithmetic ops
@@ -138,12 +215,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                          llvm_x86mmx_ty], [IntrNoMem]>;
 }
 
-// SIMD load ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse_loadu_ps : GCCBuiltin<"__builtin_ia32_loadups">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
-}
-
 // SIMD store ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse_storeu_ps : GCCBuiltin<"__builtin_ia32_storeups">,
@@ -452,14 +523,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v2f64_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
 }
 
-// SIMD load ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse2_loadu_pd : GCCBuiltin<"__builtin_ia32_loadupd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty], [IntrReadMem]>;
-  def int_x86_sse2_loadu_dq : GCCBuiltin<"__builtin_ia32_loaddqu">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
-}
-
 // SIMD store ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse2_storeu_pd : GCCBuiltin<"__builtin_ia32_storeupd">,
@@ -921,68 +984,68 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
 // String/text processing ops.
 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
   def int_x86_sse42_pcmpistrm128  : GCCBuiltin<"__builtin_ia32_pcmpistrm128">,
-	  Intrinsic<[llvm_v16i8_ty],
-		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-		    [IntrNoMem]>;
+    Intrinsic<[llvm_v16i8_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+        [IntrNoMem]>;
   def int_x86_sse42_pcmpistri128  : GCCBuiltin<"__builtin_ia32_pcmpistri128">,
-	  Intrinsic<[llvm_i32_ty],
-		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-		    [IntrNoMem]>;
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+        [IntrNoMem]>;
   def int_x86_sse42_pcmpistria128 : GCCBuiltin<"__builtin_ia32_pcmpistria128">,
-	  Intrinsic<[llvm_i32_ty],
-		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-		    [IntrNoMem]>;
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+        [IntrNoMem]>;
   def int_x86_sse42_pcmpistric128 : GCCBuiltin<"__builtin_ia32_pcmpistric128">,
-	  Intrinsic<[llvm_i32_ty],
-		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-		    [IntrNoMem]>;
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+        [IntrNoMem]>;
   def int_x86_sse42_pcmpistrio128 : GCCBuiltin<"__builtin_ia32_pcmpistrio128">,
-	  Intrinsic<[llvm_i32_ty],
-		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-		    [IntrNoMem]>;
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+        [IntrNoMem]>;
   def int_x86_sse42_pcmpistris128 : GCCBuiltin<"__builtin_ia32_pcmpistris128">,
-	  Intrinsic<[llvm_i32_ty],
-		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-		    [IntrNoMem]>;
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+        [IntrNoMem]>;
   def int_x86_sse42_pcmpistriz128 : GCCBuiltin<"__builtin_ia32_pcmpistriz128">,
-	  Intrinsic<[llvm_i32_ty],
-		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-		    [IntrNoMem]>;
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+        [IntrNoMem]>;
   def int_x86_sse42_pcmpestrm128  : GCCBuiltin<"__builtin_ia32_pcmpestrm128">,
-	  Intrinsic<[llvm_v16i8_ty],
-		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
-		     llvm_i8_ty],
-		    [IntrNoMem]>;
+    Intrinsic<[llvm_v16i8_ty],
+        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+         llvm_i8_ty],
+        [IntrNoMem]>;
   def int_x86_sse42_pcmpestri128  : GCCBuiltin<"__builtin_ia32_pcmpestri128">,
-	  Intrinsic<[llvm_i32_ty],
-		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
-		     llvm_i8_ty],
-		    [IntrNoMem]>;
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+         llvm_i8_ty],
+        [IntrNoMem]>;
   def int_x86_sse42_pcmpestria128 : GCCBuiltin<"__builtin_ia32_pcmpestria128">,
-	  Intrinsic<[llvm_i32_ty],
-		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
-		     llvm_i8_ty],
-		    [IntrNoMem]>;
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+         llvm_i8_ty],
+        [IntrNoMem]>;
   def int_x86_sse42_pcmpestric128 : GCCBuiltin<"__builtin_ia32_pcmpestric128">,
-	  Intrinsic<[llvm_i32_ty],
-		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
-		     llvm_i8_ty],
-		    [IntrNoMem]>;
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+         llvm_i8_ty],
+        [IntrNoMem]>;
   def int_x86_sse42_pcmpestrio128 : GCCBuiltin<"__builtin_ia32_pcmpestrio128">,
-	  Intrinsic<[llvm_i32_ty],
-		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
-		     llvm_i8_ty],
-		    [IntrNoMem]>;
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+         llvm_i8_ty],
+        [IntrNoMem]>;
   def int_x86_sse42_pcmpestris128 : GCCBuiltin<"__builtin_ia32_pcmpestris128">,
-	  Intrinsic<[llvm_i32_ty],
-		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
-		     llvm_i8_ty],
-		    [IntrNoMem]>;
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+         llvm_i8_ty],
+        [IntrNoMem]>;
   def int_x86_sse42_pcmpestriz128 : GCCBuiltin<"__builtin_ia32_pcmpestriz128">,
-	  Intrinsic<[llvm_i32_ty],
-		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
-		     llvm_i8_ty],
-		    [IntrNoMem]>;
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+         llvm_i8_ty],
+        [IntrNoMem]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1571,14 +1634,14 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[], [llvm_ptrx86mmx_ty, llvm_x86mmx_ty], []>;
 
   def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, 
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
                         llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem]>;
 
   def int_x86_mmx_pextr_w : GCCBuiltin<"__builtin_ia32_vec_ext_v4hi">,
-              Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty], 
+              Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty],
                         [IntrNoMem]>;
 
   def int_x86_mmx_pinsr_w : GCCBuiltin<"__builtin_ia32_vec_set_v4hi">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, 
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
                         llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
 }
diff --git a/include/llvm/IntrinsicsXCore.td b/include/llvm/IntrinsicsXCore.td
index 944120f..e633af0 100644
--- a/include/llvm/IntrinsicsXCore.td
+++ b/include/llvm/IntrinsicsXCore.td
@@ -9,8 +9,13 @@
 //===----------------------------------------------------------------------===//
 
 let TargetPrefix = "xcore" in {  // All intrinsics start with "llvm.xcore.".
+  // Miscellaneous instructions.
   def int_xcore_bitrev : Intrinsic<[llvm_i32_ty],[llvm_i32_ty],[IntrNoMem]>;
   def int_xcore_getid : Intrinsic<[llvm_i32_ty],[],[IntrNoMem]>;
+  def int_xcore_getps : Intrinsic<[llvm_i32_ty],[llvm_i32_ty]>;
+  def int_xcore_setps : Intrinsic<[],[llvm_i32_ty, llvm_i32_ty]>;
+  def int_xcore_setsr : Intrinsic<[],[llvm_i32_ty]>;
+  def int_xcore_clrsr : Intrinsic<[],[llvm_i32_ty]>;
 
   // Resource instructions.
   def int_xcore_getr : Intrinsic<[llvm_anyptr_ty],[llvm_i32_ty]>;
@@ -48,8 +53,37 @@ let TargetPrefix = "xcore" in {  // All intrinsics start with "llvm.xcore.".
   def int_xcore_setv : Intrinsic<[],[llvm_anyptr_ty, llvm_ptr_ty],
                                  [NoCapture<0>]>;
   def int_xcore_eeu : Intrinsic<[],[llvm_anyptr_ty], [NoCapture<0>]>;
+  def int_xcore_setclk : Intrinsic<[],[llvm_anyptr_ty, llvm_anyptr_ty],
+                                   [NoCapture<0>, NoCapture<1>]>;
+  def int_xcore_setrdy : Intrinsic<[],[llvm_anyptr_ty, llvm_anyptr_ty],
+                                   [NoCapture<0>, NoCapture<1>]>;
+  def int_xcore_setpsc : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty],
+                                   [NoCapture<0>]>;
 
   // Intrinsics for events.
   def int_xcore_waitevent : Intrinsic<[llvm_ptr_ty],[], [IntrReadMem]>;
+
+  // If any of the resources owned by the thread are ready this returns the
+  // vector of one of the ready resources. If no resources owned by the thread
+  // are ready then the operand passed to the intrinsic is returned.
+  def int_xcore_checkevent : Intrinsic<[llvm_ptr_ty],[llvm_ptr_ty]>;
+
   def int_xcore_clre : Intrinsic<[],[],[]>;
+
+  // Intrinsics for threads.
+  def int_xcore_getst : Intrinsic <[llvm_anyptr_ty],[llvm_anyptr_ty],
+                                   [NoCapture<0>]>;
+  def int_xcore_msync : Intrinsic <[],[llvm_anyptr_ty], [NoCapture<0>]>;
+  def int_xcore_ssync : Intrinsic <[],[]>;
+  def int_xcore_mjoin : Intrinsic <[],[llvm_anyptr_ty], [NoCapture<0>]>;
+  def int_xcore_initsp : Intrinsic <[],[llvm_anyptr_ty, llvm_ptr_ty],
+                                    [NoCapture<0>]>;
+  def int_xcore_initpc : Intrinsic <[],[llvm_anyptr_ty, llvm_ptr_ty],
+                                    [NoCapture<0>]>;
+  def int_xcore_initlr : Intrinsic <[],[llvm_anyptr_ty, llvm_ptr_ty],
+                                    [NoCapture<0>]>;
+  def int_xcore_initcp : Intrinsic <[],[llvm_anyptr_ty, llvm_ptr_ty],
+                                    [NoCapture<0>]>;
+  def int_xcore_initdp : Intrinsic <[],[llvm_anyptr_ty, llvm_ptr_ty],
+                                    [NoCapture<0>]>;
 }
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 69e1bd9..88ee65a 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -49,7 +49,6 @@ namespace {
       (void) llvm::createAliasAnalysisCounterPass();
       (void) llvm::createAliasDebugger();
       (void) llvm::createArgumentPromotionPass();
-      (void) llvm::createStructRetPromotionPass();
       (void) llvm::createBasicAliasAnalysisPass();
       (void) llvm::createLibCallAliasAnalysisPass(0);
       (void) llvm::createScalarEvolutionAliasAnalysisPass();
@@ -71,6 +70,7 @@ namespace {
       (void) llvm::createEdgeProfilerPass();
       (void) llvm::createOptimalEdgeProfilerPass();
       (void) llvm::createPathProfilerPass();
+      (void) llvm::createGCOVProfilerPass(true, true);
       (void) llvm::createFunctionInliningPass();
       (void) llvm::createAlwaysInlinerPass();
       (void) llvm::createGlobalDCEPass();
@@ -84,7 +84,6 @@ namespace {
       (void) llvm::createLCSSAPass();
       (void) llvm::createLICMPass();
       (void) llvm::createLazyValueInfoPass();
-      (void) llvm::createLiveValuesPass();
       (void) llvm::createLoopDependenceAnalysisPass();
       (void) llvm::createLoopExtractorPass();
       (void) llvm::createLoopSimplifyPass();
@@ -119,7 +118,6 @@ namespace {
       (void) llvm::createSCCPPass();
       (void) llvm::createScalarReplAggregatesPass();
       (void) llvm::createSimplifyLibCallsPass();
-      (void) llvm::createSimplifyHalfPowrLibCallsPass();
       (void) llvm::createSingleLoopExtractorPass();
       (void) llvm::createStripSymbolsPass();
       (void) llvm::createStripNonDebugSymbolsPass();
@@ -136,7 +134,6 @@ namespace {
       (void) llvm::createMemCpyOptPass();
       (void) llvm::createLoopDeletionPass();
       (void) llvm::createPostDomTree();
-      (void) llvm::createPostDomFrontier();
       (void) llvm::createInstructionNamerPass();
       (void) llvm::createFunctionAttrsPass();
       (void) llvm::createMergeFunctionsPass();
@@ -145,7 +142,6 @@ namespace {
       (void) llvm::createDbgInfoPrinterPass();
       (void) llvm::createModuleDebugInfoPrinterPass();
       (void) llvm::createPartialInliningPass();
-      (void) llvm::createGEPSplitterPass();
       (void) llvm::createLintPass();
       (void) llvm::createSinkingPass();
       (void) llvm::createLowerAtomicPass();
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 0bf364a..8733161 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -20,13 +20,16 @@
 #include <cassert>
 
 namespace llvm {
+  class MCExpr;
   class MCSection;
+  class MCStreamer;
+  class MCSymbol;
   class MCContext;
 
   /// MCAsmInfo - This class is intended to be used as a base class for asm
   /// properties and features specific to the target.
   namespace ExceptionHandling {
-    enum ExceptionsType { None, DwarfTable, DwarfCFI, SjLj };
+    enum ExceptionsType { None, DwarfTable, DwarfCFI, SjLj, ARM };
   }
 
   class MCAsmInfo {
@@ -66,10 +69,9 @@ namespace llvm {
     /// relative expressions.
     const char *PCSymbol;                    // Defaults to "$".
 
-    /// SeparatorChar - This character, if specified, is used to separate
-    /// instructions from each other when on the same line.  This is used to
-    /// measure inline asm instructions.
-    char SeparatorChar;                      // Defaults to ';'
+    /// SeparatorString - This string, if specified, is used to separate
+    /// instructions from each other when on the same line.
+    const char *SeparatorString;             // Defaults to ';'
 
     /// CommentColumn - This indicates the comment num (zero-based) at
     /// which asm comments should be printed.
@@ -322,6 +324,16 @@ namespace llvm {
       return 0;
     }
 
+    virtual const MCExpr *
+    getExprForPersonalitySymbol(const MCSymbol *Sym,
+                                unsigned Encoding,
+                                MCStreamer &Streamer) const;
+
+    const MCExpr *
+    getExprForFDESymbol(const MCSymbol *Sym,
+                        unsigned Encoding,
+                        MCStreamer &Streamer) const;
+
     bool usesSunStyleELFSectionSwitchSyntax() const {
       return SunStyleELFSectionSwitchSyntax;
     }
@@ -350,8 +362,8 @@ namespace llvm {
     const char *getPCSymbol() const {
       return PCSymbol;
     }
-    char getSeparatorChar() const {
-      return SeparatorChar;
+    const char *getSeparatorString() const {
+      return SeparatorString;
     }
     unsigned getCommentColumn() const {
       return CommentColumn;
@@ -451,7 +463,8 @@ namespace llvm {
     bool isExceptionHandlingDwarf() const {
       return
         (ExceptionsType == ExceptionHandling::DwarfTable ||
-         ExceptionsType == ExceptionHandling::DwarfCFI);
+         ExceptionsType == ExceptionHandling::DwarfCFI ||
+         ExceptionsType == ExceptionHandling::ARM);
     }
 
     bool doesDwarfRequireFrameSection() const {
diff --git a/include/llvm/MC/MCAsmLayout.h b/include/llvm/MC/MCAsmLayout.h
index 01cb000..a4585d1 100644
--- a/include/llvm/MC/MCAsmLayout.h
+++ b/include/llvm/MC/MCAsmLayout.h
@@ -36,8 +36,8 @@ private:
   /// List of sections in layout order.
   llvm::SmallVector<MCSectionData*, 16> SectionOrder;
 
-  /// The last fragment which was layed out, or 0 if nothing has been layed
-  /// out. Fragments are always layed out in order, so all fragments with a
+  /// The last fragment which was laid out, or 0 if nothing has been laid
+  /// out. Fragments are always laid out in order, so all fragments with a
   /// lower ordinal will be up to date.
   mutable DenseMap<const MCSectionData*, MCFragment *> LastValidFragment;
 
@@ -58,7 +58,7 @@ public:
   void Invalidate(MCFragment *F);
 
   /// \brief Perform layout for a single fragment, assuming that the previous
-  /// fragment has already been layed out correctly, and the parent section has
+  /// fragment has already been laid out correctly, and the parent section has
   /// been initialized.
   void LayoutFragment(MCFragment *Fragment);
 
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 30971c6..fc91966 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -706,7 +706,7 @@ private:
   /// \param DF The fragment the fixup is inside.
   /// \param Target [out] On return, the relocatable expression the fixup
   /// evaluates to.
-  /// \param Value [out] On return, the value of the fixup as currently layed
+  /// \param Value [out] On return, the value of the fixup as currently laid
   /// out.
   /// \return Whether the fixup value was fully resolved. This is true if the
   /// \arg Value result is fixed, otherwise the value may change due to
@@ -745,7 +745,7 @@ private:
                        MCFragment &F, const MCFixup &Fixup);
 
 public:
-  /// Compute the effective fragment size assuming it is layed out at the given
+  /// Compute the effective fragment size assuming it is laid out at the given
   /// \arg SectionAddress and \arg FragmentOffset.
   uint64_t ComputeFragmentSize(const MCAsmLayout &Layout, const MCFragment &F) const;
 
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 7b26d54..070089e 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -45,12 +45,18 @@ namespace llvm {
 
     const TargetAsmInfo *TAI;
 
+    /// Allocator - Allocator object used for creating machine code objects.
+    ///
+    /// We use a bump pointer allocator to avoid the need to track all allocated
+    /// objects.
+    BumpPtrAllocator Allocator;
+
     /// Symbols - Bindings of names to symbols.
-    StringMap<MCSymbol*> Symbols;
+    StringMap<MCSymbol*, BumpPtrAllocator&> Symbols;
 
     /// UsedNames - Keeps tracks of names that were used both for used declared
     /// and artificial symbols.
-    StringMap<bool> UsedNames;
+    StringMap<bool, BumpPtrAllocator&> UsedNames;
 
     /// NextUniqueID - The next ID to dole out to an unnamed assembler temporary
     /// symbol.
@@ -84,6 +90,11 @@ namespace llvm {
     MCDwarfLoc CurrentDwarfLoc;
     bool DwarfLocSeen;
 
+    /// Honor temporary labels, this is useful for debugging semantic
+    /// differences between temporary and non-temporary labels (primarily on
+    /// Darwin).
+    bool AllowTemporaryLabels;
+
     /// The dwarf line information from the .loc directives for the sections
     /// with assembled machine instructions have after seeing .loc directives.
     DenseMap<const MCSection *, MCLineSection *> MCLineSections;
@@ -91,12 +102,6 @@ namespace llvm {
     /// the elements were added.
     std::vector<const MCSection *> MCLineSectionOrder;
 
-    /// Allocator - Allocator object used for creating machine code objects.
-    ///
-    /// We use a bump pointer allocator to avoid the need to track all allocated
-    /// objects.
-    BumpPtrAllocator Allocator;
-
     void *MachOUniquingMap, *ELFUniquingMap, *COFFUniquingMap;
 
     MCSymbol *CreateSymbol(StringRef Name);
@@ -109,6 +114,8 @@ namespace llvm {
 
     const TargetAsmInfo &getTargetAsmInfo() const { return *TAI; }
 
+    void setAllowTemporaryLabels(bool Value) { AllowTemporaryLabels = Value; }
+
     /// @name Symbol Management
     /// @{
 
diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler.h
index c9e42eb..ce8759a 100644
--- a/include/llvm/MC/MCDisassembler.h
+++ b/include/llvm/MC/MCDisassembler.h
@@ -10,12 +10,14 @@
 #define MCDISASSEMBLER_H
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm-c/Disassembler.h"
 
 namespace llvm {
   
 class MCInst;
 class MemoryObject;
 class raw_ostream;
+class MCContext;
   
 struct EDInstInfo;
 
@@ -24,7 +26,7 @@ struct EDInstInfo;
 class MCDisassembler {
 public:
   /// Constructor     - Performs initial setup for the disassembler.
-  MCDisassembler() {}
+  MCDisassembler() : GetOpInfo(0), DisInfo(0), Ctx(0) {}
   
   virtual ~MCDisassembler();
   
@@ -46,13 +48,37 @@ public:
                                        uint64_t address,
                                        raw_ostream &vStream) const = 0;
 
-  /// getEDInfo - Returns the enhanced insturction information corresponding to
+  /// getEDInfo - Returns the enhanced instruction information corresponding to
   ///   the disassembler.
   ///
   /// @return         - An array of instruction information, with one entry for
   ///                   each MCInst opcode this disassembler returns.
   ///                   NULL if there is no info for this target.
   virtual EDInstInfo   *getEDInfo() const { return (EDInstInfo*)0; }
+
+private:
+  //
+  // Hooks for symbolic disassembly via the public 'C' interface.
+  //
+  // The function to get the symbolic information for operands.
+  LLVMOpInfoCallback GetOpInfo;
+  // The pointer to the block of symbolic information for above call back.
+  void *DisInfo;
+  // The assembly context for creating symbols and MCExprs in place of
+  // immediate operands when there is symbolic information.
+  MCContext *Ctx;
+
+public:
+  void setupForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo,
+                                   void *disInfo,
+                                   MCContext *ctx) {
+    GetOpInfo = getOpInfo;
+    DisInfo = disInfo;
+    Ctx = ctx;
+  }
+  LLVMOpInfoCallback getLLVMOpInfoCallback() const { return GetOpInfo; }
+  void *getDisInfoBlock() const { return DisInfo; }
+  MCContext *getMCContext() const { return Ctx; }
 };
 
 } // namespace llvm
diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h
index 07a7bad..3bbcf3e 100644
--- a/include/llvm/MC/MCDwarf.h
+++ b/include/llvm/MC/MCDwarf.h
@@ -23,6 +23,7 @@
 #include <vector>
 
 namespace llvm {
+  class TargetAsmInfo;
   class MachineMove;
   class MCContext;
   class MCExpr;
@@ -230,7 +231,7 @@ namespace llvm {
 
   class MCCFIInstruction {
   public:
-    enum OpType { Remember, Restore, Move };
+    enum OpType { SameValue, Remember, Restore, Move, RelMove };
   private:
     OpType Operation;
     MCSymbol *Label;
@@ -242,10 +243,19 @@ namespace llvm {
       : Operation(Op), Label(L) {
       assert(Op == Remember || Op == Restore);
     }
+    MCCFIInstruction(OpType Op, MCSymbol *L, unsigned Register)
+      : Operation(Op), Label(L), Destination(Register) {
+      assert(Op == SameValue);
+    }
     MCCFIInstruction(MCSymbol *L, const MachineLocation &D,
                      const MachineLocation &S)
       : Operation(Move), Label(L), Destination(D), Source(S) {
     }
+    MCCFIInstruction(OpType Op, MCSymbol *L, const MachineLocation &D,
+                     const MachineLocation &S)
+      : Operation(Op), Label(L), Destination(D), Source(S) {
+      assert(Op == RelMove);
+    }
     OpType getOperation() const { return Operation; }
     MCSymbol *getLabel() const { return Label; }
     const MachineLocation &getDestination() const { return Destination; }
@@ -254,12 +264,13 @@ namespace llvm {
 
   struct MCDwarfFrameInfo {
     MCDwarfFrameInfo() : Begin(0), End(0), Personality(0), Lsda(0),
-                         Instructions(), PersonalityEncoding(0),
+                         Function(0), Instructions(), PersonalityEncoding(),
                          LsdaEncoding(0) {}
     MCSymbol *Begin;
     MCSymbol *End;
     const MCSymbol *Personality;
     const MCSymbol *Lsda;
+    const MCSymbol *Function;
     std::vector<MCCFIInstruction> Instructions;
     unsigned PersonalityEncoding;
     unsigned LsdaEncoding;
@@ -270,9 +281,11 @@ namespace llvm {
     //
     // This emits the frame info section.
     //
-    static void Emit(MCStreamer &streamer);
+    static void Emit(MCStreamer &streamer, bool usingCFI);
+    static void EmitDarwin(MCStreamer &streamer, bool usingCFI);
     static void EmitAdvanceLoc(MCStreamer &Streamer, uint64_t AddrDelta);
-    static void EncodeAdvanceLoc(uint64_t AddrDelta, raw_ostream &OS);
+    static void EncodeAdvanceLoc(uint64_t AddrDelta, raw_ostream &OS,
+                                 const TargetAsmInfo &AsmInfo);
   };
 } // end namespace llvm
 
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index fea5249..521fde6 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -19,6 +19,7 @@ class MCAsmInfo;
 class MCAsmLayout;
 class MCAssembler;
 class MCContext;
+class MCSection;
 class MCSectionData;
 class MCSymbol;
 class MCValue;
@@ -92,6 +93,12 @@ public:
   /// @result - True on success.
   bool EvaluateAsRelocatable(MCValue &Res, const MCAsmLayout &Layout) const;
 
+  /// FindAssociatedSection - Find the "associated section" for this expression,
+  /// which is currently defined as the absolute section for constants, or
+  /// otherwise the section associated with the first defined symbol in the
+  /// expression.
+  const MCSection *FindAssociatedSection() const;
+
   /// @}
 
   static bool classof(const MCExpr *) { return true; }
@@ -420,6 +427,7 @@ public:
   virtual bool EvaluateAsRelocatableImpl(MCValue &Res,
                                          const MCAsmLayout *Layout) const = 0;
   virtual void AddValueSymbols(MCAssembler *) const = 0;
+  virtual const MCSection *FindAssociatedSection() const = 0;
 
   static bool classof(const MCExpr *E) {
     return E->getKind() == MCExpr::Target;
diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h
index 96716c7..0669558 100644
--- a/include/llvm/MC/MCInstPrinter.h
+++ b/include/llvm/MC/MCInstPrinter.h
@@ -25,9 +25,12 @@ protected:
   /// assembly emission is disable.
   raw_ostream *CommentStream;
   const MCAsmInfo &MAI;
+
+  /// The current set of available features.
+  unsigned AvailableFeatures;
 public:
   MCInstPrinter(const MCAsmInfo &mai)
-    : CommentStream(0), MAI(mai) {}
+    : CommentStream(0), MAI(mai), AvailableFeatures(0) {}
 
   virtual ~MCInstPrinter();
 
@@ -41,6 +44,12 @@ public:
   /// getOpcodeName - Return the name of the specified opcode enum (e.g.
   /// "MOV32ri") or empty if we can't resolve it.
   virtual StringRef getOpcodeName(unsigned Opcode) const;
+
+  /// getRegName - Return the assembler register name.
+  virtual StringRef getRegName(unsigned RegNo) const;
+
+  unsigned getAvailableFeatures() const { return AvailableFeatures; }
+  void setAvailableFeatures(unsigned Value) { AvailableFeatures = Value; }
 };
 
 } // namespace llvm
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index 833341e..8b0d87a 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -38,6 +38,9 @@ class MCObjectStreamer : public MCStreamer {
 protected:
   MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
                    raw_ostream &_OS, MCCodeEmitter *_Emitter);
+  MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                   raw_ostream &_OS, MCCodeEmitter *_Emitter,
+                   MCAssembler *_Assembler);
   ~MCObjectStreamer();
 
   MCSectionData *getCurrentSectionData() const {
@@ -60,9 +63,9 @@ public:
 
   virtual void EmitLabel(MCSymbol *Symbol);
   virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                             bool isPCRel, unsigned AddrSpace);
-  virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
-  virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+                             unsigned AddrSpace);
+  virtual void EmitULEB128Value(const MCExpr *Value);
+  virtual void EmitSLEB128Value(const MCExpr *Value);
   virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
   virtual void ChangeSection(const MCSection *Section);
   virtual void EmitInstruction(const MCInst &Inst);
diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h
index 252696b..ab78799 100644
--- a/include/llvm/MC/MCParser/AsmLexer.h
+++ b/include/llvm/MC/MCParser/AsmLexer.h
@@ -49,6 +49,7 @@ public:
   virtual StringRef LexUntilEndOfStatement();
 
   bool isAtStartOfComment(char Char);
+  bool isAtStatementSeparator(const char *Ptr);
 
   const MCAsmInfo &getMAI() const { return MAI; }
 
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index 1c01b2f..5700817 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_MC_MCSECTION_H
 #define LLVM_MC_MCSECTION_H
 
-#include <string>
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Support/Casting.h"
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
index 7633515..bdb17e9 100644
--- a/include/llvm/MC/MCSectionMachO.h
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -66,10 +66,10 @@ public:
     /// S_SYMBOL_STUBS - Section with symbol stubs, byte size of stub in
     /// the Reserved2 field.
     S_SYMBOL_STUBS               = 0x08U,
-    /// S_SYMBOL_STUBS - Section with only function pointers for
+    /// S_MOD_INIT_FUNC_POINTERS - Section with only function pointers for
     /// initialization.
     S_MOD_INIT_FUNC_POINTERS     = 0x09U,
-    /// S_MOD_INIT_FUNC_POINTERS - Section with only function pointers for
+    /// S_MOD_TERM_FUNC_POINTERS - Section with only function pointers for
     /// termination.
     S_MOD_TERM_FUNC_POINTERS     = 0x0AU,
     /// S_COALESCED - Section contains symbols that are to be coalesced.
@@ -157,10 +157,12 @@ public:
   /// flavored .s file.  If successful, this fills in the specified Out
   /// parameters and returns an empty string.  When an invalid section
   /// specifier is present, this returns a string indicating the problem.
+  /// If no TAA was parsed, TAA is not altered, and TAAWasSet becomes false.
   static std::string ParseSectionSpecifier(StringRef Spec,       // In.
                                            StringRef &Segment,   // Out.
                                            StringRef &Section,   // Out.
                                            unsigned  &TAA,       // Out.
+                                           bool      &TAAParsed, // Out.
                                            unsigned  &StubSize); // Out.
 
   virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 4451199..b005c8b 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -50,13 +50,12 @@ namespace llvm {
     MCStreamer(const MCStreamer&); // DO NOT IMPLEMENT
     MCStreamer &operator=(const MCStreamer&); // DO NOT IMPLEMENT
 
-    void EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
-                         bool isPCRel, unsigned AddrSpace);
-
     std::vector<MCDwarfFrameInfo> FrameInfos;
     MCDwarfFrameInfo *getCurrentFrameInfo();
     void EnsureValidFrame();
 
+    const MCSymbol* LastNonPrivate;
+
     /// SectionStack - This is stack of current and previous section
     /// values saved by PushSection.
     SmallVector<std::pair<const MCSection *,
@@ -65,6 +64,12 @@ namespace llvm {
   protected:
     MCStreamer(MCContext &Ctx);
 
+    const MCExpr *BuildSymbolDiff(MCContext &Context, const MCSymbol *A,
+                                  const MCSymbol *B);
+
+    const MCExpr *ForceExpAbs(MCStreamer *Streamer, MCContext &Context,
+                              const MCExpr* Expr);
+
   public:
     virtual ~MCStreamer();
 
@@ -180,7 +185,10 @@ namespace llvm {
     /// @param Symbol - The symbol to emit. A given symbol should only be
     /// emitted as a label once, and symbols emitted as a label should never be
     /// used in an assignment.
-    virtual void EmitLabel(MCSymbol *Symbol) = 0;
+    virtual void EmitLabel(MCSymbol *Symbol);
+
+    virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
+                                     MCSymbol *EHSymbol);
 
     /// EmitAssemblerFlag - Note in the output the specified @p Flag
     virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) = 0;
@@ -300,13 +308,10 @@ namespace llvm {
     /// @param Size - The size of the integer (in bytes) to emit. This must
     /// match a native machine width.
     virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                               bool isPCRel, unsigned AddrSpace) = 0;
+                               unsigned AddrSpace) = 0;
 
     void EmitValue(const MCExpr *Value, unsigned Size, unsigned AddrSpace = 0);
 
-    void EmitPCRelValue(const MCExpr *Value, unsigned Size,
-                        unsigned AddrSpace = 0);
-
     /// EmitIntValue - Special case of EmitValue that avoids the client having
     /// to pass in a MCExpr for constant integers.
     virtual void EmitIntValue(uint64_t Value, unsigned Size,
@@ -319,11 +324,9 @@ namespace llvm {
     void EmitAbsValue(const MCExpr *Value, unsigned Size,
                       unsigned AddrSpace = 0);
 
-    virtual void EmitULEB128Value(const MCExpr *Value,
-                                  unsigned AddrSpace = 0) = 0;
+    virtual void EmitULEB128Value(const MCExpr *Value) = 0;
 
-    virtual void EmitSLEB128Value(const MCExpr *Value,
-                                  unsigned AddrSpace = 0) = 0;
+    virtual void EmitSLEB128Value(const MCExpr *Value) = 0;
 
     /// EmitULEB128Value - Special case of EmitULEB128Value that avoids the
     /// client having to pass in a MCExpr for constant integers.
@@ -338,9 +341,6 @@ namespace llvm {
     void EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
                          unsigned AddrSpace = 0);
 
-    void EmitPCRelSymbolValue(const MCSymbol *Sym, unsigned Size,
-                              unsigned AddrSpace = 0);
-
     /// EmitGPRel32Value - Emit the expression @p Value into the output as a
     /// gprel32 (32-bit GP relative) value.
     ///
@@ -422,7 +422,8 @@ namespace llvm {
     virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
                                        unsigned Column, unsigned Flags,
                                        unsigned Isa,
-                                       unsigned Discriminator);
+                                       unsigned Discriminator,
+                                       StringRef FileName);
 
     virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                           const MCSymbol *LastLabel,
@@ -435,17 +436,19 @@ namespace llvm {
     void EmitDwarfSetLineAddr(int64_t LineDelta, const MCSymbol *Label,
                               int PointerSize);
 
-    virtual bool EmitCFIStartProc();
-    virtual bool EmitCFIEndProc();
-    virtual bool EmitCFIDefCfa(int64_t Register, int64_t Offset);
-    virtual bool EmitCFIDefCfaOffset(int64_t Offset);
-    virtual bool EmitCFIDefCfaRegister(int64_t Register);
-    virtual bool EmitCFIOffset(int64_t Register, int64_t Offset);
-    virtual bool EmitCFIPersonality(const MCSymbol *Sym,
-                                    unsigned Encoding);
-    virtual bool EmitCFILsda(const MCSymbol *Sym, unsigned Encoding);
-    virtual bool EmitCFIRememberState();
-    virtual bool EmitCFIRestoreState();
+    virtual void EmitCFIStartProc();
+    virtual void EmitCFIEndProc();
+    virtual void EmitCFIDefCfa(int64_t Register, int64_t Offset);
+    virtual void EmitCFIDefCfaOffset(int64_t Offset);
+    virtual void EmitCFIDefCfaRegister(int64_t Register);
+    virtual void EmitCFIOffset(int64_t Register, int64_t Offset);
+    virtual void EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding);
+    virtual void EmitCFILsda(const MCSymbol *Sym, unsigned Encoding);
+    virtual void EmitCFIRememberState();
+    virtual void EmitCFIRestoreState();
+    virtual void EmitCFISameValue(int64_t Register);
+    virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset);
+    virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment);
 
     /// EmitInstruction - Emit the given @p Instruction into the current
     /// section.
@@ -457,6 +460,19 @@ namespace llvm {
     virtual void EmitRawText(StringRef String);
     void EmitRawText(const Twine &String);
 
+    /// ARM-related methods.
+    /// FIXME: Eventually we should have some "target MC streamer" and move
+    /// these methods there.
+    virtual void EmitFnStart();
+    virtual void EmitFnEnd();
+    virtual void EmitCantUnwind();
+    virtual void EmitPersonality(const MCSymbol *Personality);
+    virtual void EmitHandlerData();
+    virtual void EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0);
+    virtual void EmitPad(int64_t Offset);
+    virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+                             bool isVector);
+
     /// Finish - Finish emission of machine code.
     virtual void Finish() = 0;
   };
@@ -485,6 +501,7 @@ namespace llvm {
   MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
                                 bool isVerboseAsm,
                                 bool useLoc,
+                                bool useCFI,
                                 MCInstPrinter *InstPrint = 0,
                                 MCCodeEmitter *CE = 0,
                                 TargetAsmBackend *TAB = 0,
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index 7da4d7c..0583ce5 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -56,6 +56,7 @@ namespace llvm {
     mutable unsigned IsUsed : 1;
 
   private:  // MCContext creates and uniques these.
+    friend class MCExpr;
     friend class MCContext;
     MCSymbol(StringRef name, bool isTemporary)
       : Name(name), Section(0), Value(0),
diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h
index a6c3f03..c323025 100644
--- a/include/llvm/Metadata.h
+++ b/include/llvm/Metadata.h
@@ -17,6 +17,7 @@
 #define LLVM_METADATA_H
 
 #include "llvm/Value.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/ilist_node.h"
 
@@ -110,28 +111,25 @@ class MDNode : public Value, public FoldingSetNode {
   void replaceOperand(MDNodeOperand *Op, Value *NewVal);
   ~MDNode();
 
-  MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals,
-         bool isFunctionLocal);
+  MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal);
   
-  static MDNode *getMDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals,
+  static MDNode *getMDNode(LLVMContext &C, ArrayRef<Value*> Vals,
                            FunctionLocalness FL, bool Insert = true);
 public:
   // Constructors and destructors.
-  static MDNode *get(LLVMContext &Context, Value *const *Vals,
-                     unsigned NumVals);
+  static MDNode *get(LLVMContext &Context, ArrayRef<Value*> Vals);
   // getWhenValsUnresolved - Construct MDNode determining function-localness
   // from isFunctionLocal argument, not by analyzing Vals.
-  static MDNode *getWhenValsUnresolved(LLVMContext &Context, Value *const *Vals,
-                                       unsigned NumVals, bool isFunctionLocal);
+  static MDNode *getWhenValsUnresolved(LLVMContext &Context,
+                                       ArrayRef<Value*> Vals,
+                                       bool isFunctionLocal);
                                        
-  static MDNode *getIfExists(LLVMContext &Context, Value *const *Vals,
-                             unsigned NumVals);
+  static MDNode *getIfExists(LLVMContext &Context, ArrayRef<Value*> Vals);
 
   /// getTemporary - Return a temporary MDNode, for use in constructing
   /// cyclic MDNode structures. A temporary MDNode is not uniqued,
   /// may be RAUW'd, and must be manually deleted with deleteTemporary.
-  static MDNode *getTemporary(LLVMContext &Context, Value *const *Vals,
-                              unsigned NumVals);
+  static MDNode *getTemporary(LLVMContext &Context, ArrayRef<Value*> Vals);
 
   /// deleteTemporary - Deallocate a node created by getTemporary. The
   /// node must not have any users.
diff --git a/include/llvm/Module.h b/include/llvm/Module.h
index f95895e..aef8eb8 100644
--- a/include/llvm/Module.h
+++ b/include/llvm/Module.h
@@ -211,15 +211,20 @@ public:
   void setTargetTriple(StringRef T) { TargetTriple = T; }
 
   /// Set the module-scope inline assembly blocks.
-  void setModuleInlineAsm(StringRef Asm) { GlobalScopeAsm = Asm; }
+  void setModuleInlineAsm(StringRef Asm) {
+    GlobalScopeAsm = Asm;
+    if (!GlobalScopeAsm.empty() &&
+        GlobalScopeAsm[GlobalScopeAsm.size()-1] != '\n')
+      GlobalScopeAsm += '\n';
+  }
 
   /// Append to the module-scope inline assembly blocks, automatically inserting
   /// a separating newline if necessary.
   void appendModuleInlineAsm(StringRef Asm) {
+    GlobalScopeAsm += Asm;
     if (!GlobalScopeAsm.empty() &&
         GlobalScopeAsm[GlobalScopeAsm.size()-1] != '\n')
       GlobalScopeAsm += '\n';
-    GlobalScopeAsm += Asm;
   }
 
 /// @}
@@ -303,7 +308,7 @@ public:
   ///   1. If it does not exist, add a declaration of the global and return it.
   ///   2. Else, the global exists but has the wrong type: return the function
   ///      with a constantexpr cast to the right type.
-  ///   3. Finally, if the existing global is the correct delclaration, return
+  ///   3. Finally, if the existing global is the correct declaration, return
   ///      the existing global.
   Constant *getOrInsertGlobal(StringRef Name, const Type *Ty);
 
diff --git a/include/llvm/Object/MachOObject.h b/include/llvm/Object/MachOObject.h
index 03d9c14..19a399e 100644
--- a/include/llvm/Object/MachOObject.h
+++ b/include/llvm/Object/MachOObject.h
@@ -19,6 +19,7 @@
 namespace llvm {
 
 class MemoryBuffer;
+class raw_ostream;
 
 namespace object {
 
@@ -172,7 +173,26 @@ public:
     InMemoryStruct<macho::Symbol64TableEntry> &Res) const;
 
   /// @}
+  
+  /// @name Object Dump Facilities
+  /// @{
+  /// dump - Support for debugging, callable in GDB: V->dump()
+  //
+  void dump() const;
+  void dumpHeader() const;
+  
+  /// print - Implement operator<< on Value.
+  ///
+  void print(raw_ostream &O) const;
+  void printHeader(raw_ostream &O) const;
+
+  /// @}
 };
+  
+inline raw_ostream &operator<<(raw_ostream &OS, const MachOObject &V) {
+  V.print(OS);
+  return OS;
+}
 
 } // end namespace object
 } // end namespace llvm
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index ed0fb39..04dd8b6 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -13,7 +13,7 @@
 // Passes are designed this way so that it is possible to run passes in a cache
 // and organizationally optimal order without having to specify it at the front
 // end.  This allows arbitrary passes to be strung together and have them
-// executed as effeciently as possible.
+// executed as efficiently as possible.
 //
 // Passes should extend one of the classes below, depending on the guarantees
 // that it can make about what will be modified as it is run.  For example, most
@@ -114,7 +114,7 @@ public:
   void dump() const; // dump - Print to stderr.
 
   /// createPrinterPass - Get a Pass appropriate to print the IR this
-  /// pass operates one (Module, Function or MachineFunction).
+  /// pass operates on (Module, Function or MachineFunction).
   virtual Pass *createPrinterPass(raw_ostream &O,
                                   const std::string &Banner) const = 0;
 
@@ -320,7 +320,7 @@ class BasicBlockPass : public Pass {
 public:
   explicit BasicBlockPass(char &pid) : Pass(PT_BasicBlock, pid) {}
 
-  /// createPrinterPass - Get a function printer pass.
+  /// createPrinterPass - Get a basic block printer pass.
   Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
 
   /// doInitialization - Virtual method overridden by subclasses to do
diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h
index a3342d5..fede121 100644
--- a/include/llvm/PassAnalysisSupport.h
+++ b/include/llvm/PassAnalysisSupport.h
@@ -142,6 +142,8 @@ public:
   Pass *findImplPass(Pass *P, AnalysisID PI, Function &F);
 
   void addAnalysisImplsPair(AnalysisID PI, Pass *P) {
+    if (findImplPass(PI) == P)
+      return;
     std::pair<AnalysisID, Pass*> pir = std::make_pair(PI,P);
     AnalysisImpls.push_back(pir);
   }
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index c680709..a2ad24f 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -177,6 +177,9 @@ public:
   unsigned GetNumSlabs() const;
 
   void PrintStats() const;
+  
+  /// Compute the total physical memory allocated by this allocator.
+  size_t getTotalMemory() const;
 };
 
 /// SpecificBumpPtrAllocator - Same as BumpPtrAllocator but allows only
diff --git a/include/llvm/Support/CFG.h b/include/llvm/Support/CFG.h
index 9ba71fc..d2ea123 100644
--- a/include/llvm/Support/CFG.h
+++ b/include/llvm/Support/CFG.h
@@ -41,6 +41,7 @@ class PredIterator : public std::iterator<std::forward_iterator_tag,
 public:
   typedef typename super::pointer pointer;
 
+  PredIterator() {}
   explicit inline PredIterator(Ptr *bb) : It(bb->use_begin()) {
     advancePastNonTerminators();
   }
@@ -64,6 +65,12 @@ public:
   inline Self operator++(int) { // Postincrement
     Self tmp = *this; ++*this; return tmp;
   }
+
+  /// getOperandNo - Return the operand number in the predecessor's
+  /// terminator of the successor.
+  unsigned getOperandNo() const {
+    return It.getOperandNo();
+  }
 };
 
 typedef PredIterator<BasicBlock, Value::use_iterator> pred_iterator;
diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h
index 6bb9806..abb5a9a 100644
--- a/include/llvm/Support/Casting.h
+++ b/include/llvm/Support/Casting.h
@@ -192,8 +192,8 @@ template<class To, class FromTy> struct cast_convert_val<To,FromTy,FromTy> {
 
 // cast<X> - Return the argument parameter cast to the specified type.  This
 // casting operator asserts that the type is correct, so it does not return null
-// on failure.  But it will correctly return NULL when the input is NULL.
-// Used Like this:
+// on failure.  It does not allow a null argument (use cast_or_null for that).
+// It is typically used like this:
 //
 //  cast<Instruction>(myVal)->getParent()
 //
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index 9ae3d6a..d609871 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -60,6 +60,12 @@ void ParseEnvironmentOptions(const char *progName, const char *envvar,
 void SetVersionPrinter(void (*func)());
 
 
+// PrintOptionValues - Print option values.
+// With -print-options print the difference between option values and defaults.
+// With -print-all-options print all option values.
+// (Currently not perfect, but best-effort.)
+void PrintOptionValues();
+
 // MarkOptionsChanged - Internal helper function.
 void MarkOptionsChanged();
 
@@ -230,6 +236,8 @@ public:
   //
   virtual void printOptionInfo(size_t GlobalWidth) const = 0;
 
+  virtual void printOptionValue(size_t GlobalWidth, bool Force) const = 0;
+
   virtual void getExtraOptionNames(SmallVectorImpl<const char*> &) {}
 
   // addOccurrence - Wrapper around handleOccurrence that enforces Flags.
@@ -303,6 +311,120 @@ LocationClass<Ty> location(Ty &L) { return LocationClass<Ty>(L); }
 
 
 //===----------------------------------------------------------------------===//
+// OptionValue class
+
+// Support value comparison outside the template.
+struct GenericOptionValue {
+  virtual ~GenericOptionValue() {}
+  virtual bool compare(const GenericOptionValue &V) const = 0;
+};
+
+template<class DataType> struct OptionValue;
+
+// The default value safely does nothing. Option value printing is only
+// best-effort.
+template<class DataType, bool isClass>
+struct OptionValueBase : public GenericOptionValue {
+  // Temporary storage for argument passing.
+  typedef OptionValue<DataType> WrapperType;
+
+  bool hasValue() const { return false; }
+
+  const DataType &getValue() const { assert(false && "no default value"); }
+
+  // Some options may take their value from a different data type.
+  template<class DT>
+  void setValue(const DT& /*V*/) {}
+
+  bool compare(const DataType &/*V*/) const { return false; }
+
+  virtual bool compare(const GenericOptionValue& /*V*/) const { return false; }
+};
+
+// Simple copy of the option value.
+template<class DataType>
+class OptionValueCopy : public GenericOptionValue {
+  DataType Value;
+  bool Valid;
+public:
+  OptionValueCopy() : Valid(false) {}
+
+  bool hasValue() const { return Valid; }
+
+  const DataType &getValue() const {
+    assert(Valid && "invalid option value");
+    return Value;
+  }
+
+  void setValue(const DataType &V) { Valid = true; Value = V; }
+
+  bool compare(const DataType &V) const {
+    return Valid && (Value != V);
+  }
+
+  virtual bool compare(const GenericOptionValue &V) const {
+    const OptionValueCopy<DataType> &VC =
+      static_cast< const OptionValueCopy<DataType>& >(V);
+    if (!VC.hasValue()) return false;
+    return compare(VC.getValue());
+  }
+};
+
+// Non-class option values.
+template<class DataType>
+struct OptionValueBase<DataType, false> : OptionValueCopy<DataType> {
+  typedef DataType WrapperType;
+};
+
+// Top-level option class.
+template<class DataType>
+struct OptionValue : OptionValueBase<DataType, is_class<DataType>::value> {
+  OptionValue() {}
+
+  OptionValue(const DataType& V) {
+    this->setValue(V);
+  }
+  // Some options may take their value from a different data type.
+  template<class DT>
+  OptionValue<DataType> &operator=(const DT& V) {
+    this->setValue(V);
+    return *this;
+  }
+};
+
+// Other safe-to-copy-by-value common option types.
+enum boolOrDefault { BOU_UNSET, BOU_TRUE, BOU_FALSE };
+template<>
+struct OptionValue<cl::boolOrDefault> : OptionValueCopy<cl::boolOrDefault> {
+  typedef cl::boolOrDefault WrapperType;
+
+  OptionValue() {}
+
+  OptionValue(const cl::boolOrDefault& V) {
+    this->setValue(V);
+  }
+  OptionValue<cl::boolOrDefault> &operator=(const cl::boolOrDefault& V) {
+    setValue(V);
+    return *this;
+  }
+};
+
+template<>
+struct OptionValue<std::string> : OptionValueCopy<std::string> {
+  typedef StringRef WrapperType;
+
+  OptionValue() {}
+
+  OptionValue(const std::string& V) {
+    this->setValue(V);
+  }
+  OptionValue<std::string> &operator=(const std::string& V) {
+    setValue(V);
+    return *this;
+  }
+};
+
+//===----------------------------------------------------------------------===//
 // Enum valued command line option
 //
 #define clEnumVal(ENUMVAL, DESC) #ENUMVAL, int(ENUMVAL), DESC
@@ -355,7 +477,6 @@ ValuesClass<DataType> END_WITH_NULL values(const char *Arg, DataType Val,
     return Vals;
 }
 
-
 //===----------------------------------------------------------------------===//
 // parser class - Parameterizable parser for different data types.  By default,
 // known data types (string, int, bool) have specialized parsers, that do what
@@ -368,7 +489,16 @@ ValuesClass<DataType> END_WITH_NULL values(const char *Arg, DataType Val,
 // not need replicated for every instance of the generic parser.  This also
 // allows us to put stuff into CommandLine.cpp
 //
-struct generic_parser_base {
+class generic_parser_base {
+protected:
+  class GenericOptionInfo {
+  public:
+    GenericOptionInfo(const char *name, const char *helpStr) :
+      Name(name), HelpStr(helpStr) {}
+    const char *Name;
+    const char *HelpStr;
+  };
+public:
   virtual ~generic_parser_base() {}  // Base class should have virtual-dtor
 
   // getNumOptions - Virtual function implemented by generic subclass to
@@ -385,11 +515,28 @@ struct generic_parser_base {
   // Return the width of the option tag for printing...
   virtual size_t getOptionWidth(const Option &O) const;
 
+  virtual const GenericOptionValue &getOptionValue(unsigned N) const = 0;
+
   // printOptionInfo - Print out information about this option.  The
   // to-be-maintained width is specified.
   //
   virtual void printOptionInfo(const Option &O, size_t GlobalWidth) const;
 
+  void printGenericOptionDiff(const Option &O, const GenericOptionValue &V,
+                              const GenericOptionValue &Default,
+                              size_t GlobalWidth) const;
+
+  // printOptionDiff - print the value of an option and it's default.
+  //
+  // Template definition ensures that the option and default have the same
+  // DataType (via the same AnyOptionValue).
+  template<class AnyOptionValue>
+  void printOptionDiff(const Option &O, const AnyOptionValue &V,
+                       const AnyOptionValue &Default,
+                       size_t GlobalWidth) const {
+    printGenericOptionDiff(O, V, Default, GlobalWidth);
+  }
+
   void initialize(Option &O) {
     // All of the modifiers for the option have been processed by now, so the
     // argstr field should be stable, copy it down now.
@@ -443,13 +590,11 @@ protected:
 template <class DataType>
 class parser : public generic_parser_base {
 protected:
-  class OptionInfo {
+  class OptionInfo : public GenericOptionInfo {
   public:
     OptionInfo(const char *name, DataType v, const char *helpStr) :
-      Name(name), V(v), HelpStr(helpStr) {}
-    const char *Name;
-    DataType V;
-    const char *HelpStr;
+      GenericOptionInfo(name, helpStr), V(v) {}
+    OptionValue<DataType> V;
   };
   SmallVector<OptionInfo, 8> Values;
 public:
@@ -462,6 +607,11 @@ public:
     return Values[N].HelpStr;
   }
 
+  // getOptionValue - Return the value of option name N.
+  virtual const GenericOptionValue &getOptionValue(unsigned N) const {
+    return Values[N].V;
+  }
+
   // parse - Return true on error.
   bool parse(Option &O, StringRef ArgName, StringRef Arg, DataType &V) {
     StringRef ArgVal;
@@ -473,7 +623,7 @@ public:
     for (unsigned i = 0, e = static_cast<unsigned>(Values.size());
          i != e; ++i)
       if (Values[i].Name == ArgVal) {
-        V = Values[i].V;
+        V = Values[i].V.getValue();
         return false;
       }
 
@@ -522,11 +672,19 @@ public:
   //
   void printOptionInfo(const Option &O, size_t GlobalWidth) const;
 
+  // printOptionNoValue - Print a placeholder for options that don't yet support
+  // printOptionDiff().
+  void printOptionNoValue(const Option &O, size_t GlobalWidth) const;
+
   // getValueName - Overload in subclass to provide a better default value.
   virtual const char *getValueName() const { return "value"; }
 
   // An out-of-line virtual method to provide a 'home' for this class.
   virtual void anchor();
+
+protected:
+  // A helper for basic_parser::printOptionDiff.
+  void printOptionName(const Option &O, size_t GlobalWidth) const;
 };
 
 // basic_parser - The real basic parser is just a template wrapper that provides
@@ -536,6 +694,7 @@ template<class DataType>
 class basic_parser : public basic_parser_impl {
 public:
   typedef DataType parser_data_type;
+  typedef OptionValue<DataType> OptVal;
 };
 
 //--------------------------------------------------
@@ -561,6 +720,9 @@ public:
   // getValueName - Do not print =<value> at all.
   virtual const char *getValueName() const { return 0; }
 
+  void printOptionDiff(const Option &O, bool V, OptVal Default,
+                       size_t GlobalWidth) const;
+
   // An out-of-line virtual method to provide a 'home' for this class.
   virtual void anchor();
 };
@@ -569,7 +731,6 @@ EXTERN_TEMPLATE_INSTANTIATION(class basic_parser<bool>);
 
 //--------------------------------------------------
 // parser<boolOrDefault>
-enum boolOrDefault { BOU_UNSET, BOU_TRUE, BOU_FALSE };
 template<>
 class parser<boolOrDefault> : public basic_parser<boolOrDefault> {
 public:
@@ -583,6 +744,9 @@ public:
   // getValueName - Do not print =<value> at all.
   virtual const char *getValueName() const { return 0; }
 
+  void printOptionDiff(const Option &O, boolOrDefault V, OptVal Default,
+                       size_t GlobalWidth) const;
+
   // An out-of-line virtual method to provide a 'home' for this class.
   virtual void anchor();
 };
@@ -601,6 +765,9 @@ public:
   // getValueName - Overload in subclass to provide a better default value.
   virtual const char *getValueName() const { return "int"; }
 
+  void printOptionDiff(const Option &O, int V, OptVal Default,
+                       size_t GlobalWidth) const;
+
   // An out-of-line virtual method to provide a 'home' for this class.
   virtual void anchor();
 };
@@ -620,6 +787,9 @@ public:
   // getValueName - Overload in subclass to provide a better default value.
   virtual const char *getValueName() const { return "uint"; }
 
+  void printOptionDiff(const Option &O, unsigned V, OptVal Default,
+                       size_t GlobalWidth) const;
+
   // An out-of-line virtual method to provide a 'home' for this class.
   virtual void anchor();
 };
@@ -638,6 +808,9 @@ public:
   // getValueName - Overload in subclass to provide a better default value.
   virtual const char *getValueName() const { return "number"; }
 
+  void printOptionDiff(const Option &O, double V, OptVal Default,
+                       size_t GlobalWidth) const;
+
   // An out-of-line virtual method to provide a 'home' for this class.
   virtual void anchor();
 };
@@ -656,6 +829,9 @@ public:
   // getValueName - Overload in subclass to provide a better default value.
   virtual const char *getValueName() const { return "number"; }
 
+  void printOptionDiff(const Option &O, float V, OptVal Default,
+                       size_t GlobalWidth) const;
+
   // An out-of-line virtual method to provide a 'home' for this class.
   virtual void anchor();
 };
@@ -677,6 +853,9 @@ public:
   // getValueName - Overload in subclass to provide a better default value.
   virtual const char *getValueName() const { return "string"; }
 
+  void printOptionDiff(const Option &O, StringRef V, OptVal Default,
+                       size_t GlobalWidth) const;
+
   // An out-of-line virtual method to provide a 'home' for this class.
   virtual void anchor();
 };
@@ -698,12 +877,63 @@ public:
   // getValueName - Overload in subclass to provide a better default value.
   virtual const char *getValueName() const { return "char"; }
 
+  void printOptionDiff(const Option &O, char V, OptVal Default,
+                       size_t GlobalWidth) const;
+
   // An out-of-line virtual method to provide a 'home' for this class.
   virtual void anchor();
 };
 
 EXTERN_TEMPLATE_INSTANTIATION(class basic_parser<char>);
 
+//--------------------------------------------------
+// PrintOptionDiff
+//
+// This collection of wrappers is the intermediary between class opt and class
+// parser to handle all the template nastiness.
+
+// This overloaded function is selected by the generic parser.
+template<class ParserClass, class DT>
+void printOptionDiff(const Option &O, const generic_parser_base &P, const DT &V,
+                     const OptionValue<DT> &Default, size_t GlobalWidth) {
+  OptionValue<DT> OV = V;
+  P.printOptionDiff(O, OV, Default, GlobalWidth);
+}
+
+// This is instantiated for basic parsers when the parsed value has a different
+// type than the option value. e.g. HelpPrinter.
+template<class ParserDT, class ValDT>
+struct OptionDiffPrinter {
+  void print(const Option &O, const parser<ParserDT> P, const ValDT &/*V*/,
+             const OptionValue<ValDT> &/*Default*/, size_t GlobalWidth) {
+    P.printOptionNoValue(O, GlobalWidth);
+  }
+};
+
+// This is instantiated for basic parsers when the parsed value has the same
+// type as the option value.
+template<class DT>
+struct OptionDiffPrinter<DT, DT> {
+  void print(const Option &O, const parser<DT> P, const DT &V,
+             const OptionValue<DT> &Default, size_t GlobalWidth) {
+    P.printOptionDiff(O, V, Default, GlobalWidth);
+  }
+};
+
+// This overloaded function is selected by the basic parser, which may parse a
+// different type than the option type.
+template<class ParserClass, class ValDT>
+void printOptionDiff(
+  const Option &O,
+  const basic_parser<typename ParserClass::parser_data_type> &P,
+  const ValDT &V, const OptionValue<ValDT> &Default,
+  size_t GlobalWidth) {
+
+  OptionDiffPrinter<typename ParserClass::parser_data_type, ValDT> printer;
+  printer.print(O, static_cast<const ParserClass&>(P), V, Default,
+                GlobalWidth);
+}
+
 //===----------------------------------------------------------------------===//
 // applicator class - This class is used because we must use partial
 // specialization to handle literal string arguments specially (const char* does
@@ -753,7 +983,6 @@ void apply(const Mod &M, Opt *O) {
   applicator<Mod>::opt(M, *O);
 }
 
-
 //===----------------------------------------------------------------------===//
 // opt_storage class
 
@@ -764,6 +993,7 @@ void apply(const Mod &M, Opt *O) {
 template<class DataType, bool ExternalStorage, bool isClass>
 class opt_storage {
   DataType *Location;   // Where to store the object...
+  OptionValue<DataType> Default;
 
   void check() const {
     assert(Location != 0 && "cl::location(...) not specified for a command "
@@ -777,21 +1007,25 @@ public:
     if (Location)
       return O.error("cl::location(x) specified more than once!");
     Location = &L;
+    Default = L;
     return false;
   }
 
   template<class T>
-  void setValue(const T &V) {
+  void setValue(const T &V, bool initial = false) {
     check();
     *Location = V;
+    if (initial)
+      Default = V;
   }
 
   DataType &getValue() { check(); return *Location; }
   const DataType &getValue() const { check(); return *Location; }
 
   operator DataType() const { return this->getValue(); }
-};
 
+  const OptionValue<DataType> &getDefault() const { return Default; }
+};
 
 // Define how to hold a class type object, such as a string.  Since we can
 // inherit from a class, we do so.  This makes us exactly compatible with the
@@ -800,11 +1034,19 @@ public:
 template<class DataType>
 class opt_storage<DataType,false,true> : public DataType {
 public:
+  OptionValue<DataType> Default;
+
   template<class T>
-  void setValue(const T &V) { DataType::operator=(V); }
+  void setValue(const T &V, bool initial = false) {
+    DataType::operator=(V);
+    if (initial)
+      Default = V;
+  }
 
   DataType &getValue() { return *this; }
   const DataType &getValue() const { return *this; }
+
+  const OptionValue<DataType> &getDefault() const { return Default; }
 };
 
 // Define a partial specialization to handle things we cannot inherit from.  In
@@ -815,16 +1057,23 @@ template<class DataType>
 class opt_storage<DataType, false, false> {
 public:
   DataType Value;
+  OptionValue<DataType> Default;
 
   // Make sure we initialize the value with the default constructor for the
   // type.
   opt_storage() : Value(DataType()) {}
 
   template<class T>
-  void setValue(const T &V) { Value = V; }
+  void setValue(const T &V, bool initial = false) {
+    Value = V;
+    if (initial)
+      Default = V;
+  }
   DataType &getValue() { return Value; }
   DataType getValue() const { return Value; }
 
+  const OptionValue<DataType> &getDefault() const { return Default; }
+
   operator DataType() const { return getValue(); }
 
   // If the datatype is a pointer, support -> on it.
@@ -866,13 +1115,20 @@ class opt : public Option,
     Parser.printOptionInfo(*this, GlobalWidth);
   }
 
+  virtual void printOptionValue(size_t GlobalWidth, bool Force) const {
+    if (Force || this->getDefault().compare(this->getValue())) {
+      cl::printOptionDiff<ParserClass>(
+        *this, Parser, this->getValue(), this->getDefault(), GlobalWidth);
+    }
+  }
+
   void done() {
     addArgument();
     Parser.initialize(*this);
   }
 public:
   // setInitialValue - Used by the cl::init modifier...
-  void setInitialValue(const DataType &V) { this->setValue(V); }
+  void setInitialValue(const DataType &V) { this->setValue(V, true); }
 
   ParserClass &getParser() { return Parser; }
 
@@ -1030,6 +1286,9 @@ class list : public Option, public list_storage<DataType, Storage> {
     Parser.printOptionInfo(*this, GlobalWidth);
   }
 
+  // Unimplemented: list options don't currently store their default value.
+  virtual void printOptionValue(size_t /*GlobalWidth*/, bool /*Force*/) const {}
+
   void done() {
     addArgument();
     Parser.initialize(*this);
@@ -1229,6 +1488,9 @@ class bits : public Option, public bits_storage<DataType, Storage> {
     Parser.printOptionInfo(*this, GlobalWidth);
   }
 
+  // Unimplemented: bits options don't currently store their default values.
+  virtual void printOptionValue(size_t /*GlobalWidth*/, bool /*Force*/) const {}
+
   void done() {
     addArgument();
     Parser.initialize(*this);
@@ -1320,6 +1582,9 @@ class alias : public Option {
   virtual size_t getOptionWidth() const;
   virtual void printOptionInfo(size_t GlobalWidth) const;
 
+  // Aliases do not need to print their values.
+  virtual void printOptionValue(size_t /*GlobalWidth*/, bool /*Force*/) const {}
+
   void done() {
     if (!hasArgStr())
       error("cl::alias must have argument name specified!");
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index 67f0fd7..e092157 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -126,4 +126,12 @@
   decl
 #endif
 
+// LLVM_BUILTIN_UNREACHABLE - On compilers which support it, expands
+// to an expression which states that it is undefined behavior for the
+// compiler to reach this point.  Otherwise is not defined.
+#if defined(__clang__) || (__GNUC__ > 4) \
+ || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
+# define LLVM_BUILTIN_UNREACHABLE __builtin_unreachable()
+#endif
+
 #endif
diff --git a/include/llvm/Support/ConstantFolder.h b/include/llvm/Support/ConstantFolder.h
index bd3765d..d0eaa3e 100644
--- a/include/llvm/Support/ConstantFolder.h
+++ b/include/llvm/Support/ConstantFolder.h
@@ -22,12 +22,10 @@
 
 namespace llvm {
 
-class LLVMContext;
-
 /// ConstantFolder - Create constants with minimum, target independent, folding.
 class ConstantFolder {
 public:
-  explicit ConstantFolder(LLVMContext &) {}
+  explicit ConstantFolder() {}
 
   //===--------------------------------------------------------------------===//
   // Binary Operators
diff --git a/include/llvm/Support/CrashRecoveryContext.h b/include/llvm/Support/CrashRecoveryContext.h
index 2e9b5d4..db835e8 100644
--- a/include/llvm/Support/CrashRecoveryContext.h
+++ b/include/llvm/Support/CrashRecoveryContext.h
@@ -15,6 +15,8 @@
 namespace llvm {
 class StringRef;
 
+class CrashRecoveryContextCleanup;
+  
 /// \brief Crash recovery helper object.
 ///
 /// This class implements support for running operations in a safe context so
@@ -42,10 +44,14 @@ class StringRef;
 /// Crash recovery contexts may not be nested.
 class CrashRecoveryContext {
   void *Impl;
+  CrashRecoveryContextCleanup *head;
 
 public:
-  CrashRecoveryContext() : Impl(0) {}
+  CrashRecoveryContext() : Impl(0), head(0) {}
   ~CrashRecoveryContext();
+  
+  void registerCleanup(CrashRecoveryContextCleanup *cleanup);
+  void unregisterCleanup(CrashRecoveryContextCleanup *cleanup);
 
   /// \brief Enable crash recovery.
   static void Enable();
@@ -57,6 +63,10 @@ public:
   /// thread which is in a protected context.
   static CrashRecoveryContext *GetCurrent();
 
+  /// \brief Return true if the current thread is recovering from a
+  /// crash.
+  static bool isRecoveringFromCrash();
+
   /// \brief Execute the provide callback function (with the given arguments) in
   /// a protected context.
   ///
@@ -87,6 +97,99 @@ public:
   const std::string &getBacktrace() const;
 };
 
+class CrashRecoveryContextCleanup {
+protected:
+  CrashRecoveryContext *context;
+  CrashRecoveryContextCleanup(CrashRecoveryContext *context)
+    : context(context), cleanupFired(false) {}
+public:
+  bool cleanupFired;
+  
+  virtual ~CrashRecoveryContextCleanup();
+  virtual void recoverResources() = 0;
+
+  CrashRecoveryContext *getContext() const {
+    return context;
+  }
+
+private:
+  friend class CrashRecoveryContext;
+  CrashRecoveryContextCleanup *prev, *next;
+};
+
+template<typename DERIVED, typename T>
+class CrashRecoveryContextCleanupBase : public CrashRecoveryContextCleanup {
+protected:
+  T *resource;
+  CrashRecoveryContextCleanupBase(CrashRecoveryContext *context, T* resource)
+    : CrashRecoveryContextCleanup(context), resource(resource) {}
+public:
+  static DERIVED *create(T *x) {
+    if (x) {
+      if (CrashRecoveryContext *context = CrashRecoveryContext::GetCurrent())
+        return new DERIVED(context, x);
+    }
+    return 0;
+  }
+};
+
+template <typename T>
+class CrashRecoveryContextDestructorCleanup : public
+  CrashRecoveryContextCleanupBase<CrashRecoveryContextDestructorCleanup<T>, T> {
+public:
+  CrashRecoveryContextDestructorCleanup(CrashRecoveryContext *context,
+                                        T *resource) 
+    : CrashRecoveryContextCleanupBase<
+        CrashRecoveryContextDestructorCleanup<T>, T>(context, resource) {}
+
+  virtual void recoverResources() {
+    this->resource->~T();
+  }
+};
+
+template <typename T>
+class CrashRecoveryContextDeleteCleanup : public
+  CrashRecoveryContextCleanupBase<CrashRecoveryContextDeleteCleanup<T>, T> {
+public:
+  CrashRecoveryContextDeleteCleanup(CrashRecoveryContext *context, T *resource)
+    : CrashRecoveryContextCleanupBase<
+        CrashRecoveryContextDeleteCleanup<T>, T>(context, resource) {}
+
+  virtual void recoverResources() {
+    delete this->resource;
+  }  
+};
+
+template <typename T>
+class CrashRecoveryContextReleaseRefCleanup : public
+  CrashRecoveryContextCleanupBase<CrashRecoveryContextReleaseRefCleanup<T>, T>
+{
+public:
+  CrashRecoveryContextReleaseRefCleanup(CrashRecoveryContext *context, 
+                                        T *resource)
+    : CrashRecoveryContextCleanupBase<CrashRecoveryContextReleaseRefCleanup<T>,
+          T>(context, resource) {}
+
+  virtual void recoverResources() {
+    this->resource->Release();
+  }
+};
+
+template <typename T, typename Cleanup = CrashRecoveryContextDeleteCleanup<T> >
+class CrashRecoveryContextCleanupRegistrar {
+  CrashRecoveryContextCleanup *cleanup;
+public:
+  CrashRecoveryContextCleanupRegistrar(T *x)
+    : cleanup(Cleanup::create(x)) {
+    if (cleanup)
+      cleanup->getContext()->registerCleanup(cleanup);
+  }
+
+  ~CrashRecoveryContextCleanupRegistrar() {
+    if (cleanup && !cleanup->cleanupFired)
+        cleanup->getContext()->unregisterCleanup(cleanup);
+  }
+};
 }
 
 #endif
diff --git a/include/llvm/Support/DOTGraphTraits.h b/include/llvm/Support/DOTGraphTraits.h
index 796c74a..3cb8164 100644
--- a/include/llvm/Support/DOTGraphTraits.h
+++ b/include/llvm/Support/DOTGraphTraits.h
@@ -89,8 +89,9 @@ public:
 
   /// If you want to override the dot attributes printed for a particular edge,
   /// override this method.
-  template<typename EdgeIter>
-  static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
+  template<typename EdgeIter, typename GraphType>
+  static std::string getEdgeAttributes(const void *Node, EdgeIter EI,
+                                       const GraphType& Graph) {
     return "";
   }
 
diff --git a/include/llvm/Support/DebugLoc.h b/include/llvm/Support/DebugLoc.h
index ccc3446..98a05a4 100644
--- a/include/llvm/Support/DebugLoc.h
+++ b/include/llvm/Support/DebugLoc.h
@@ -15,6 +15,8 @@
 #ifndef LLVM_SUPPORT_DEBUGLOC_H
 #define LLVM_SUPPORT_DEBUGLOC_H
 
+#include "llvm/ADT/DenseMapInfo.h"
+
 namespace llvm {
   class MDNode;
   class LLVMContext;
@@ -23,6 +25,24 @@ namespace llvm {
   /// and MachineInstr to compactly encode file/line/scope information for an
   /// operation.
   class DebugLoc {
+    friend struct DenseMapInfo<DebugLoc>;
+
+    /// getEmptyKey() - A private constructor that returns an unknown that is
+    /// not equal to the tombstone key or DebugLoc().
+    static DebugLoc getEmptyKey() {
+      DebugLoc DL;
+      DL.LineCol = 1;
+      return DL;
+    }
+
+    /// getTombstoneKey() - A private constructor that returns an unknown that
+    /// is not equal to the empty key or DebugLoc().
+    static DebugLoc getTombstoneKey() {
+      DebugLoc DL;
+      DL.LineCol = 2;
+      return DL;
+    }
+
     /// LineCol - This 32-bit value encodes the line and column number for the
     /// location, encoded as 24-bits for line and 8 bits for col.  A value of 0
     /// for either means unknown.
@@ -75,6 +95,14 @@ namespace llvm {
     }
     bool operator!=(const DebugLoc &DL) const { return !(*this == DL); }
   };
+
+  template <>
+  struct DenseMapInfo<DebugLoc> {
+    static DebugLoc getEmptyKey();
+    static DebugLoc getTombstoneKey();
+    static unsigned getHashValue(const DebugLoc &Key);
+    static bool isEqual(const DebugLoc &LHS, const DebugLoc &RHS);
+  };
 } // end namespace llvm
 
 #endif /* LLVM_DEBUGLOC_H */
diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h
index 5d0b5a9..f6d680b 100644
--- a/include/llvm/Support/Dwarf.h
+++ b/include/llvm/Support/Dwarf.h
@@ -231,6 +231,10 @@ enum dwarf_constants {
   DW_AT_APPLE_major_runtime_vers = 0x3fe5,
   DW_AT_APPLE_runtime_class = 0x3fe6,
   DW_AT_APPLE_omit_frame_ptr = 0x3fe7,
+  DW_AT_APPLE_property_name = 0x3fe8,
+  DW_AT_APPLE_property_getter = 0x3fe9,
+  DW_AT_APPLE_property_setter = 0x3fea,
+  DW_AT_APPLE_property_attribute = 0x3feb,
 
   // Attribute form encodings
   DW_FORM_addr = 0x01,
@@ -407,6 +411,7 @@ enum dwarf_constants {
   DW_OP_call_ref = 0x9a,
   DW_OP_form_tls_address = 0x9b,
   DW_OP_call_frame_cfa = 0x9c,
+  DW_OP_bit_piece = 0x9d,
   DW_OP_lo_user = 0xe0,
   DW_OP_hi_user = 0xff,
 
@@ -584,7 +589,15 @@ enum dwarf_constants {
   DW_EH_PE_datarel = 0x30,
   DW_EH_PE_funcrel = 0x40,
   DW_EH_PE_aligned = 0x50,
-  DW_EH_PE_indirect = 0x80
+  DW_EH_PE_indirect = 0x80,
+
+  // Apple Objective-C Property Attributes
+  DW_APPLE_PROPERTY_readonly = 0x01,
+  DW_APPLE_PROPERTY_readwrite = 0x02,
+  DW_APPLE_PROPERTY_assign = 0x04,
+  DW_APPLE_PROPERTY_retain = 0x08,
+  DW_APPLE_PROPERTY_copy = 0x10,
+  DW_APPLE_PROPERTY_nonatomic = 0x20
 };
 
 /// TagString - Return the string for the specified tag.
diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h
index 5eca438..95b0109 100644
--- a/include/llvm/Support/ErrorHandling.h
+++ b/include/llvm/Support/ErrorHandling.h
@@ -86,16 +86,19 @@ namespace llvm {
                                                          unsigned line=0);
 }
 
-/// Prints the message and location info to stderr in !NDEBUG builds.
-/// This is intended to be used for "impossible" situations that imply
-/// a bug in the compiler.
+/// Marks that the current location is not supposed to be reachable.
+/// In !NDEBUG builds, prints the message and location info to stderr.
+/// In NDEBUG builds, becomes an optimizer hint that the current location
+/// is not supposed to be reachable.  On compilers that don't support
+/// such hints, prints a reduced message instead.
 ///
-/// In NDEBUG mode it only prints "UNREACHABLE executed".
-/// Use this instead of assert(0), so that the compiler knows this path
-/// is not reachable even for NDEBUG builds.
+/// Use this instead of assert(0).  It conveys intent more clearly and
+/// allows compilers to omit some unnecessary code.
 #ifndef NDEBUG
 #define llvm_unreachable(msg) \
   ::llvm::llvm_unreachable_internal(msg, __FILE__, __LINE__)
+#elif defined(LLVM_BUILTIN_UNREACHABLE)
+#define llvm_unreachable(msg) LLVM_BUILTIN_UNREACHABLE
 #else
 #define llvm_unreachable(msg) ::llvm::llvm_unreachable_internal()
 #endif
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index 4001bf0..4f013f8 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -595,7 +595,7 @@ public:
   void replace_filename(const Twine &filename, file_status st = file_status(),
                               file_status symlink_st = file_status());
 
-  StringRef path() const { return Path; }
+  const std::string &path() const { return Path; }
   error_code status(file_status &result) const;
   error_code symlink_status(file_status &result) const;
 
diff --git a/include/llvm/Support/FileUtilities.h b/include/llvm/Support/FileUtilities.h
index 748ce7c..5456eb7 100644
--- a/include/llvm/Support/FileUtilities.h
+++ b/include/llvm/Support/FileUtilities.h
@@ -15,13 +15,14 @@
 #ifndef LLVM_SUPPORT_FILEUTILITIES_H
 #define LLVM_SUPPORT_FILEUTILITIES_H
 
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 
 namespace llvm {
 
   /// DiffFilesWithTolerance - Compare the two files specified, returning 0 if
   /// the files match, 1 if they are different, and 2 if there is a file error.
-  /// This function allows you to specify an absolete and relative FP error that
+  /// This function allows you to specify an absolute and relative FP error that
   /// is allowed to exist.  If you specify a string to fill in for the error
   /// option, it will set the string to an error message if an error occurs, or
   /// if the files are different.
@@ -37,29 +38,36 @@ namespace llvm {
   /// specified (if deleteIt is true).
   ///
   class FileRemover {
-    sys::Path Filename;
+    SmallString<128> Filename;
     bool DeleteIt;
   public:
     FileRemover() : DeleteIt(false) {}
 
-    explicit FileRemover(const sys::Path &filename, bool deleteIt = true)
-      : Filename(filename), DeleteIt(deleteIt) {}
+    explicit FileRemover(const Twine& filename, bool deleteIt = true)
+      : DeleteIt(deleteIt) {
+      filename.toVector(Filename);
+    }
 
     ~FileRemover() {
       if (DeleteIt) {
         // Ignore problems deleting the file.
-        Filename.eraseFromDisk();
+        bool existed;
+        sys::fs::remove(Filename.str(), existed);
       }
     }
 
     /// setFile - Give ownership of the file to the FileRemover so it will
     /// be removed when the object is destroyed.  If the FileRemover already
     /// had ownership of a file, remove it first.
-    void setFile(const sys::Path &filename, bool deleteIt = true) {
-      if (DeleteIt)
-        Filename.eraseFromDisk();
+    void setFile(const Twine& filename, bool deleteIt = true) {
+      if (DeleteIt) {
+        // Ignore problems deleting the file.
+        bool existed;
+        sys::fs::remove(Filename.str(), existed);
+      }
 
-      Filename = filename;
+      Filename.clear();
+      filename.toVector(Filename);
       DeleteIt = deleteIt;
     }
 
diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h
index 7573ef0..eab0c9d 100644
--- a/include/llvm/Support/GraphWriter.h
+++ b/include/llvm/Support/GraphWriter.h
@@ -70,7 +70,7 @@ class GraphWriter {
     for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i) {
       std::string label = DTraits.getEdgeSourceLabel(Node, EI);
 
-      if (label == "")
+      if (label.empty())
         continue;
 
       hasEdgeSourceLabels = true;
@@ -78,7 +78,7 @@ class GraphWriter {
       if (i)
         O << "|";
 
-      O << "<s" << i << ">" << DTraits.getEdgeSourceLabel(Node, EI);
+      O << "<s" << i << ">" << DOT::EscapeString(label);
     }
 
     if (EI != EE && hasEdgeSourceLabels)
@@ -235,12 +235,12 @@ public:
         DestPort = static_cast<int>(Offset);
       }
 
-      if (DTraits.getEdgeSourceLabel(Node, EI) == "")
+      if (DTraits.getEdgeSourceLabel(Node, EI).empty())
         edgeidx = -1;
 
       emitEdge(static_cast<const void*>(Node), edgeidx,
                static_cast<const void*>(TargetNode), DestPort,
-               DTraits.getEdgeAttributes(Node, EI));
+               DTraits.getEdgeAttributes(Node, EI, G));
     }
   }
 
@@ -272,7 +272,7 @@ public:
                 const void *DestNodeID, int DestNodePort,
                 const std::string &Attrs) {
     if (SrcNodePort  > 64) return;             // Eminating from truncated part?
-    if (DestNodePort > 64) DestNodePort = 64;  // Targetting the truncated part?
+    if (DestNodePort > 64) DestNodePort = 64;  // Targeting the truncated part?
 
     O << "\tNode" << SrcNodeID;
     if (SrcNodePort >= 0)
diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h
index 2394a59..3878e79 100644
--- a/include/llvm/Support/IRBuilder.h
+++ b/include/llvm/Support/IRBuilder.h
@@ -17,6 +17,8 @@
 
 #include "llvm/Instructions.h"
 #include "llvm/BasicBlock.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ConstantFolder.h"
 
@@ -152,9 +154,10 @@ public:
 
   /// CreateGlobalString - Make a new global variable with an initializer that
   /// has array of i8 type filled in with the nul terminated string value
-  /// specified.  If Name is specified, it is the name of the global variable
-  /// created.
-  Value *CreateGlobalString(const char *Str = "", const Twine &Name = "");
+  /// specified.  The new global variable will be marked mergable with any
+  /// others of the same contents.  If Name is specified, it is the name of the
+  /// global variable created.
+  Value *CreateGlobalString(StringRef Str, const Twine &Name = "");
 
   /// getInt1 - Get a constant value representing either true or false.
   ConstantInt *getInt1(bool V) {
@@ -190,6 +193,10 @@ public:
   ConstantInt *getInt64(uint64_t C) {
     return ConstantInt::get(getInt64Ty(), C);
   }
+  
+  ConstantInt *getInt(const APInt &AI) {
+    return ConstantInt::get(Context, AI);
+  }
 
   //===--------------------------------------------------------------------===//
   // Type creation methods
@@ -301,7 +308,7 @@ public:
     : IRBuilderBase(C), Inserter(I), Folder(F) {
   }
 
-  explicit IRBuilder(LLVMContext &C) : IRBuilderBase(C), Folder(C) {
+  explicit IRBuilder(LLVMContext &C) : IRBuilderBase(C), Folder() {
   }
 
   explicit IRBuilder(BasicBlock *TheBB, const T &F)
@@ -310,12 +317,12 @@ public:
   }
 
   explicit IRBuilder(BasicBlock *TheBB)
-    : IRBuilderBase(TheBB->getContext()), Folder(Context) {
+    : IRBuilderBase(TheBB->getContext()), Folder() {
     SetInsertPoint(TheBB);
   }
 
   explicit IRBuilder(Instruction *IP)
-    : IRBuilderBase(IP->getContext()), Folder(Context) {
+    : IRBuilderBase(IP->getContext()), Folder() {
     SetInsertPoint(IP);
   }
   
@@ -325,7 +332,7 @@ public:
   }
 
   IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP)
-    : IRBuilderBase(TheBB->getContext()), Folder(Context) {
+    : IRBuilderBase(TheBB->getContext()), Folder() {
     SetInsertPoint(TheBB, IP);
   }
 
@@ -861,7 +868,7 @@ public:
 
   /// CreateGlobalStringPtr - Same as CreateGlobalString, but return a pointer
   /// with "i8*" type instead of a pointer to array of i8.
-  Value *CreateGlobalStringPtr(const char *Str = "", const Twine &Name = "") {
+  Value *CreateGlobalStringPtr(StringRef Str, const Twine &Name = "") {
     Value *gv = CreateGlobalString(Str, Name);
     Value *zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
     Value *Args[] = { zero, zero };
@@ -1070,8 +1077,9 @@ public:
   // Instruction creation methods: Other Instructions
   //===--------------------------------------------------------------------===//
 
-  PHINode *CreatePHI(const Type *Ty, const Twine &Name = "") {
-    return Insert(PHINode::Create(Ty), Name);
+  PHINode *CreatePHI(const Type *Ty, unsigned NumReservedValues,
+                     const Twine &Name = "") {
+    return Insert(PHINode::Create(Ty, NumReservedValues), Name);
   }
 
   CallInst *CreateCall(Value *Callee, const Twine &Name = "") {
@@ -1101,6 +1109,11 @@ public:
     return Insert(CallInst::Create(Callee, Args, Args+5), Name);
   }
 
+  CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Arg,
+                       const Twine &Name = "") {
+    return Insert(CallInst::Create(Callee, Arg.begin(), Arg.end(), Name));
+  }
+
   template<typename RandomAccessIterator>
   CallInst *CreateCall(Value *Callee, RandomAccessIterator ArgBegin,
                        RandomAccessIterator ArgEnd, const Twine &Name = "") {
diff --git a/include/llvm/Support/Memory.h b/include/llvm/Support/Memory.h
index 9c3f85b..37890e7 100644
--- a/include/llvm/Support/Memory.h
+++ b/include/llvm/Support/Memory.h
@@ -75,12 +75,12 @@ namespace sys {
     /// setExecutable - Before the JIT can run a block of code, it has to be
     /// given read and executable privilege. Return true if it is already r-x
     /// or the system is able to change its previlege.
-    static bool setExecutable (MemoryBlock &M, std::string *ErrMsg = 0);
+    static bool setExecutable(MemoryBlock &M, std::string *ErrMsg = 0);
 
     /// setWritable - When adding to a block of code, the JIT may need
     /// to mark a block of code as RW since the protections are on page
     /// boundaries, and the JIT internal allocations are not page aligned.
-    static bool setWritable (MemoryBlock &M, std::string *ErrMsg = 0);
+    static bool setWritable(MemoryBlock &M, std::string *ErrMsg = 0);
 
     /// setRangeExecutable - Mark the page containing a range of addresses
     /// as executable.
diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h
index b6243b7..d912e86 100644
--- a/include/llvm/Support/MemoryBuffer.h
+++ b/include/llvm/Support/MemoryBuffer.h
@@ -40,7 +40,8 @@ class MemoryBuffer {
   MemoryBuffer &operator=(const MemoryBuffer &); // DO NOT IMPLEMENT
 protected:
   MemoryBuffer() {}
-  void init(const char *BufStart, const char *BufEnd);
+  void init(const char *BufStart, const char *BufEnd,
+            bool RequiresNullTerminator);
 public:
   virtual ~MemoryBuffer();
 
@@ -63,21 +64,27 @@ public:
   /// specified, this means that the client knows that the file exists and that
   /// it has the specified size.
   static error_code getFile(StringRef Filename, OwningPtr<MemoryBuffer> &result,
-                            int64_t FileSize = -1);
+                            int64_t FileSize = -1,
+                            bool RequiresNullTerminator = true);
   static error_code getFile(const char *Filename,
                             OwningPtr<MemoryBuffer> &result,
-                            int64_t FileSize = -1);
+                            int64_t FileSize = -1,
+                            bool RequiresNullTerminator = true);
 
   /// getOpenFile - Given an already-open file descriptor, read the file and
   /// return a MemoryBuffer.
   static error_code getOpenFile(int FD, const char *Filename,
                                 OwningPtr<MemoryBuffer> &result,
-                                int64_t FileSize = -1);
+                                size_t FileSize = -1,
+                                size_t MapSize = -1,
+                                off_t Offset = 0,
+                                bool RequiresNullTerminator = true);
 
   /// getMemBuffer - Open the specified memory range as a MemoryBuffer.  Note
   /// that InputData must be null terminated.
   static MemoryBuffer *getMemBuffer(StringRef InputData,
-                                    StringRef BufferName = "");
+                                    StringRef BufferName = "",
+                                    bool RequiresNullTerminator = true);
 
   /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
   /// copying the contents and taking ownership of it.  InputData does not
@@ -112,6 +119,21 @@ public:
   static error_code getFileOrSTDIN(const char *Filename,
                                    OwningPtr<MemoryBuffer> &result,
                                    int64_t FileSize = -1);
+  
+  
+  //===--------------------------------------------------------------------===//
+  // Provided for performance analysis.
+  //===--------------------------------------------------------------------===//
+
+  /// The kind of memory backing used to support the MemoryBuffer.
+  enum BufferKind {
+    MemoryBuffer_Malloc,
+    MemoryBuffer_MMap
+  };
+
+  /// Return information on the memory mechanism used to support the
+  /// MemoryBuffer.
+  virtual BufferKind getBufferKind() const = 0;  
 };
 
 } // end namespace llvm
diff --git a/include/llvm/Support/NoFolder.h b/include/llvm/Support/NoFolder.h
index 92a9fd6..5ead26e 100644
--- a/include/llvm/Support/NoFolder.h
+++ b/include/llvm/Support/NoFolder.h
@@ -27,12 +27,10 @@
 
 namespace llvm {
 
-class LLVMContext;
-
 /// NoFolder - Create "constants" (actually, instructions) with no folding.
 class NoFolder {
 public:
-  explicit NoFolder(LLVMContext &) {}
+  explicit NoFolder() {}
 
   //===--------------------------------------------------------------------===//
   // Binary Operators
diff --git a/include/llvm/Support/PathV1.h b/include/llvm/Support/PathV1.h
index d7753a3..024bb39 100644
--- a/include/llvm/Support/PathV1.h
+++ b/include/llvm/Support/PathV1.h
@@ -608,14 +608,15 @@ namespace sys {
       ///
       /// This API is not intended for general use, clients should use
       /// MemoryBuffer::getFile instead.
-      static const char *MapInFilePages(int FD, uint64_t FileSize);
+      static const char *MapInFilePages(int FD, size_t FileSize,
+                                        off_t Offset);
 
       /// UnMapFilePages - Free pages mapped into the current process by
       /// MapInFilePages.
       ///
       /// This API is not intended for general use, clients should use
       /// MemoryBuffer::getFile instead.
-      static void UnMapFilePages(const char *Base, uint64_t FileSize);
+      static void UnMapFilePages(const char *Base, size_t FileSize);
 
     /// @}
     /// @name Data
diff --git a/include/llvm/Support/PatternMatch.h b/include/llvm/Support/PatternMatch.h
index 948ae51..172480e 100644
--- a/include/llvm/Support/PatternMatch.h
+++ b/include/llvm/Support/PatternMatch.h
@@ -40,6 +40,23 @@ bool match(Val *V, const Pattern &P) {
   return const_cast<Pattern&>(P).match(V);
 }
 
+  
+template<typename SubPattern_t>
+struct OneUse_match {
+  SubPattern_t SubPattern;
+  
+  OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
+  
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    return V->hasOneUse() && SubPattern.match(V);
+  }
+};
+
+template<typename T>
+inline OneUse_match<T> m_OneUse(const T &SubPattern) { return SubPattern; }
+  
+  
 template<typename Class>
 struct class_match {
   template<typename ITy>
@@ -227,7 +244,25 @@ struct specificval_ty {
 /// m_Specific - Match if we have a specific specified value.
 inline specificval_ty m_Specific(const Value *V) { return V; }
 
+struct bind_const_intval_ty {
+  uint64_t &VR;
+  bind_const_intval_ty(uint64_t &V) : VR(V) {}
+  
+  template<typename ITy>
+  bool match(ITy *V) {
+    if (ConstantInt *CV = dyn_cast<ConstantInt>(V))
+      if (CV->getBitWidth() <= 64) {
+        VR = CV->getZExtValue();
+        return true;
+      }
+    return false;
+  }
+};
 
+/// m_ConstantInt - Match a ConstantInt and bind to its value.  This does not
+/// match ConstantInts wider than 64-bits.
+inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; }
+  
 //===----------------------------------------------------------------------===//
 // Matchers for specific binary operators.
 //
diff --git a/include/llvm/Support/PrettyStackTrace.h b/include/llvm/Support/PrettyStackTrace.h
index 6dbce39..9b3ecda 100644
--- a/include/llvm/Support/PrettyStackTrace.h
+++ b/include/llvm/Support/PrettyStackTrace.h
@@ -20,7 +20,7 @@ namespace llvm {
   class raw_ostream;
 
   /// DisablePrettyStackTrace - Set this to true to disable this module. This
-  /// might be neccessary if the host application installs its own signal
+  /// might be necessary if the host application installs its own signal
   /// handlers which conflict with the ones installed by this module.
   /// Defaults to false.
   extern bool DisablePrettyStackTrace;
diff --git a/include/llvm/Support/Program.h b/include/llvm/Support/Program.h
index 78a495e..96b3566 100644
--- a/include/llvm/Support/Program.h
+++ b/include/llvm/Support/Program.h
@@ -102,7 +102,7 @@ namespace sys {
       );
 
     /// This function terminates the program.
-    /// @returns true if an error occured.
+    /// @returns true if an error occurred.
     /// @see Execute
     /// @brief Terminates the program.
     bool Kill
diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h
index b46a668..7648e77 100644
--- a/include/llvm/Support/Regex.h
+++ b/include/llvm/Support/Regex.h
@@ -53,7 +53,7 @@ namespace llvm {
 
     /// matches - Match the regex against a given \arg String.
     ///
-    /// \param Matches - If given, on a succesful match this will be filled in
+    /// \param Matches - If given, on a successful match this will be filled in
     /// with references to the matched group expressions (inside \arg String),
     /// the first group is always the entire pattern.
     ///
diff --git a/include/llvm/Support/Signals.h b/include/llvm/Support/Signals.h
index 9a84df6..634f4cf 100644
--- a/include/llvm/Support/Signals.h
+++ b/include/llvm/Support/Signals.h
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This file defines some helpful functions for dealing with the possibility of
-// unix signals occuring while your program is running.
+// unix signals occurring while your program is running.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h
index a41a633..2a712e4 100644
--- a/include/llvm/Support/SourceMgr.h
+++ b/include/llvm/Support/SourceMgr.h
@@ -156,10 +156,9 @@ public:
   // Null diagnostic.
   SMDiagnostic() : SM(0), LineNo(0), ColumnNo(0), ShowLine(0) {}
   // Diagnostic with no location (e.g. file not found, command line arg error).
-  SMDiagnostic(const std::string &filename, const std::string &Msg,
-               bool showline = true)
+  SMDiagnostic(const std::string &filename, const std::string &Msg)
     : SM(0), Filename(filename), LineNo(-1), ColumnNo(-1),
-      Message(Msg), ShowLine(showline) {}
+      Message(Msg), ShowLine(false) {}
   
   // Diagnostic with a location.
   SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN,
@@ -171,7 +170,7 @@ public:
 
   const SourceMgr *getSourceMgr() const { return SM; }
   SMLoc getLoc() const { return Loc; }
-  const std::string &getFilename() { return Filename; }
+  const std::string &getFilename() const { return Filename; }
   int getLineNo() const { return LineNo; }
   int getColumnNo() const { return ColumnNo; }
   const std::string &getMessage() const { return Message; }
diff --git a/include/llvm/Support/StandardPasses.h b/include/llvm/Support/StandardPasses.h
index d774faf..8dfd6f9 100644
--- a/include/llvm/Support/StandardPasses.h
+++ b/include/llvm/Support/StandardPasses.h
@@ -72,6 +72,7 @@ namespace llvm {
                                                 Pass *InliningPass) {
     createStandardAliasAnalysisPasses(PM);
 
+    // If all optimizations are disabled, just run the always-inline pass.
     if (OptimizationLevel == 0) {
       if (InliningPass)
         PM->add(InliningPass);
@@ -83,9 +84,10 @@ namespace llvm {
       
       PM->add(createIPSCCPPass());              // IP SCCP
       PM->add(createDeadArgEliminationPass());  // Dead argument elimination
+      
+      PM->add(createInstructionCombiningPass());// Clean up after IPCP & DAE
+      PM->add(createCFGSimplificationPass());   // Clean up after IPCP & DAE
     }
-    PM->add(createInstructionCombiningPass());  // Clean up after IPCP & DAE
-    PM->add(createCFGSimplificationPass());     // Clean up after IPCP & DAE
     
     // Start of CallGraph SCC passes.
     if (UnitAtATime && HaveExceptions)
@@ -120,7 +122,6 @@ namespace llvm {
     PM->add(createLoopDeletionPass());          // Delete dead loops
     if (UnrollLoops)
       PM->add(createLoopUnrollPass());          // Unroll small loops
-    PM->add(createInstructionCombiningPass());  // Clean up after the unroller
     if (OptimizationLevel > 1)
       PM->add(createGVNPass());                 // Remove redundancies
     PM->add(createMemCpyOptPass());             // Remove memcpy / form memset
@@ -134,6 +135,7 @@ namespace llvm {
     PM->add(createDeadStoreEliminationPass());  // Delete dead stores
     PM->add(createAggressiveDCEPass());         // Delete dead instructions
     PM->add(createCFGSimplificationPass());     // Merge & remove BBs
+    PM->add(createInstructionCombiningPass());  // Clean up after everything.
 
     if (UnitAtATime) {
       PM->add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
diff --git a/include/llvm/Support/TimeValue.h b/include/llvm/Support/TimeValue.h
index e122711..94f132a 100644
--- a/include/llvm/Support/TimeValue.h
+++ b/include/llvm/Support/TimeValue.h
@@ -35,13 +35,13 @@ namespace sys {
   public:
 
     /// A constant TimeValue representing the smallest time
-    /// value permissable by the class. MinTime is some point
+    /// value permissible by the class. MinTime is some point
     /// in the distant past, about 300 billion years BCE.
     /// @brief The smallest possible time value.
     static const TimeValue MinTime;
 
     /// A constant TimeValue representing the largest time
-    /// value permissable by the class. MaxTime is some point
+    /// value permissible by the class. MaxTime is some point
     /// in the distant future, about 300 billion years AD.
     /// @brief The largest possible time value.
     static const TimeValue MaxTime;
diff --git a/include/llvm/Support/system_error.h b/include/llvm/Support/system_error.h
index e5306ec..47759b9 100644
--- a/include/llvm/Support/system_error.h
+++ b/include/llvm/Support/system_error.h
@@ -1,4 +1,4 @@
-//===---------------------------- system_error ----------------------------===//
+//===---------------------------- system_error ------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/include/llvm/Target/SubtargetFeature.h b/include/llvm/Target/SubtargetFeature.h
index 6c21ae9..4213d9b 100644
--- a/include/llvm/Target/SubtargetFeature.h
+++ b/include/llvm/Target/SubtargetFeature.h
@@ -35,8 +35,8 @@ namespace llvm {
 struct SubtargetFeatureKV {
   const char *Key;                      // K-V key string
   const char *Desc;                     // Help descriptor
-  uint32_t Value;                       // K-V integer value
-  uint32_t Implies;                     // K-V bit mask
+  uint64_t Value;                       // K-V integer value
+  uint64_t Implies;                     // K-V bit mask
   
   // Compare routine for std binary search
   bool operator<(const SubtargetFeatureKV &S) const {
@@ -94,7 +94,7 @@ public:
   void AddFeature(const std::string &String, bool IsEnabled = true);
            
   /// Get feature bits.
-  uint32_t getBits(const SubtargetFeatureKV *CPUTable,
+  uint64_t getBits(const SubtargetFeatureKV *CPUTable,
                          size_t CPUTableSize,
                    const SubtargetFeatureKV *FeatureTable,
                          size_t FeatureTableSize);
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index 0f7e6aa..68f0515 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -32,17 +32,6 @@ class Register<string n> {
   string Namespace = "";
   string AsmName = n;
 
-  // SpillSize - If this value is set to a non-zero value, it is the size in
-  // bits of the spill slot required to hold this register.  If this value is
-  // set to zero, the information is inferred from any register classes the
-  // register belongs to.
-  int SpillSize = 0;
-
-  // SpillAlignment - This value is used to specify the alignment required for
-  // spilling the register.  Like SpillSize, this should only be explicitly
-  // specified if the register is not in a register class.
-  int SpillAlignment = 0;
-
   // Aliases - A list of registers that this register overlaps with.  A read or
   // modification of this register can potentially read or modify the aliased
   // registers.
@@ -78,6 +67,13 @@ class Register<string n> {
   // -1 indicates that the gcc number is undefined and -2 that register number
   // is invalid for this mode/flavour.
   list<int> DwarfNumbers = [];
+
+  // CostPerUse - Additional cost of instructions using this register compared
+  // to other registers in its class. The register allocator will try to
+  // minimize the number of instructions using a register with a CostPerUse.
+  // This is used by the x86-64 and ARM Thumb targets where some registers 
+  // require larger instruction encodings.
+  int CostPerUse = 0;
 }
 
 // RegisterWithSubRegs - This can be used to define instances of Register which
@@ -200,6 +196,7 @@ class Instruction {
   bit isIndirectBranch = 0; // Is this instruction an indirect branch?
   bit isCompare    = 0;     // Is this instruction a comparison instruction?
   bit isMoveImm    = 0;     // Is this instruction a move immediate instruction?
+  bit isBitcast    = 0;     // Is this instruction a bitcast instruction?
   bit isBarrier    = 0;     // Can control flow fall through this instruction?
   bit isCall       = 0;     // Is this instruction a call instruction?
   bit canFoldAsLoad = 0;    // Can this be folded as a simple memory operand?
@@ -590,9 +587,10 @@ class MnemonicAlias<string From, string To> {
 /// InstAlias - This defines an alternate assembly syntax that is allowed to
 /// match an instruction that has a different (more canonical) assembly
 /// representation.
-class InstAlias<string Asm, dag Result> {
+class InstAlias<string Asm, dag Result, bit Emit = 0b1> {
   string AsmString = Asm;      // The .s format to match the instruction with.
   dag ResultInst = Result;     // The MCInst to generate.
+  bit EmitAlias = Emit;        // Emit the alias instead of what's aliased.
 
   // Predicates - Predicates that must be true for this to match.
   list<Predicate> Predicates = [];
diff --git a/include/llvm/Target/TargetAsmBackend.h b/include/llvm/Target/TargetAsmBackend.h
index 7527298..2111f6b 100644
--- a/include/llvm/Target/TargetAsmBackend.h
+++ b/include/llvm/Target/TargetAsmBackend.h
@@ -16,6 +16,7 @@
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
+class MCELFObjectTargetWriter;
 class MCFixup;
 class MCInst;
 class MCObjectWriter;
@@ -40,6 +41,13 @@ public:
   /// assembler backend to emit the final object file.
   virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const = 0;
 
+  /// createELFObjectTargetWriter - Create a new ELFObjectTargetWriter to enable
+  /// non-standard ELFObjectWriters.
+  virtual  MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+    assert(0 && "createELFObjectTargetWriter is not supported by asm backend");
+    return 0;
+  }
+
   /// hasReliableSymbolDifference - Check whether this target implements
   /// accurate relocations for differences between symbols. If not, differences
   /// between symbols will always be relocatable expressions and any references
diff --git a/include/llvm/Target/TargetAsmInfo.h b/include/llvm/Target/TargetAsmInfo.h
index 98aab14..0271b67 100644
--- a/include/llvm/Target/TargetAsmInfo.h
+++ b/include/llvm/Target/TargetAsmInfo.h
@@ -58,6 +58,14 @@ public:
     return TLOF->getEHFrameSection();
   }
 
+  unsigned getFDEEncoding(bool CFI) const {
+    return TLOF->getFDEEncoding(CFI);
+  }
+
+  bool isFunctionEHFrameSymbolPrivate() const {
+    return TLOF->isFunctionEHFrameSymbolPrivate();
+  }
+
   unsigned getDwarfRARegNum(bool isEH) const {
     return TRI->getDwarfRegNum(TRI->getRARegister(), isEH);
   }
diff --git a/include/llvm/Target/TargetData.h b/include/llvm/Target/TargetData.h
index 25065d3..32e3e2b 100644
--- a/include/llvm/Target/TargetData.h
+++ b/include/llvm/Target/TargetData.h
@@ -160,7 +160,18 @@ public:
   bool isIllegalInteger(unsigned Width) const {
     return !isLegalInteger(Width);
   }
-  
+
+  /// fitsInLegalInteger - This function returns true if the specified type fits
+  /// in a native integer type supported by the CPU.  For example, if the CPU
+  /// only supports i32 as a native integer type, then i27 fits in a legal
+  // integer type but i45 does not.
+  bool fitsInLegalInteger(unsigned Width) const {
+    for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i)
+      if (Width <= LegalIntWidths[i])
+        return true;
+    return false;
+  }
+
   /// Target pointer alignment
   unsigned getPointerABIAlignment() const { return PointerABIAlign; }
   /// Return target's alignment for stack-based pointers
diff --git a/include/llvm/Target/TargetInstrDesc.h b/include/llvm/Target/TargetInstrDesc.h
index 8823d5a..6e20e8a 100644
--- a/include/llvm/Target/TargetInstrDesc.h
+++ b/include/llvm/Target/TargetInstrDesc.h
@@ -105,6 +105,7 @@ namespace TID {
     IndirectBranch,
     Compare,
     MoveImm,
+    Bitcast,
     DelaySlot,
     FoldableAsLoad,
     MayLoad,
@@ -358,6 +359,12 @@ public:
   bool isMoveImmediate() const {
     return Flags & (1 << TID::MoveImm);
   }
+
+  /// isBitcast - Return true if this instruction is a bitcast instruction.
+  ///
+  bool isBitcast() const {
+    return Flags & (1 << TID::Bitcast);
+  }
   
   /// isNotDuplicable - Return true if this instruction cannot be safely
   /// duplicated.  For example, if the instruction has a unique labels attached
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index fc7b51e..418f3fe 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -477,7 +477,7 @@ public:
   }
 
   /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
-  /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+  /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
   /// be scheduled togther. On some targets if two loads are loading from
   /// addresses in the same cache line, it's better if they are scheduled
   /// together. This function takes two integers that represent the load offsets
@@ -641,6 +641,10 @@ public:
   virtual int getInstrLatency(const InstrItineraryData *ItinData,
                               SDNode *Node) const;
 
+  /// isHighLatencyDef - Return true if this opcode has high latency to its
+  /// result.
+  virtual bool isHighLatencyDef(int opc) const { return false; }
+
   /// hasHighOperandLatency - Compute operand latency between a def of 'Reg'
   /// and an use in the current loop, return true if the target considered
   /// it 'high'. This is used by optimization passes such as machine LICM to
diff --git a/include/llvm/Target/TargetInstrItineraries.h b/include/llvm/Target/TargetInstrItineraries.h
index a95b70f..198d585 100644
--- a/include/llvm/Target/TargetInstrItineraries.h
+++ b/include/llvm/Target/TargetInstrItineraries.h
@@ -155,9 +155,13 @@ public:
   /// in the itinerary.
   ///
   unsigned getStageLatency(unsigned ItinClassIndx) const {
-    // If the target doesn't provide itinerary information, use a
-    // simple non-zero default value for all instructions.
-    if (isEmpty())
+    // If the target doesn't provide itinerary information, use a simple
+    // non-zero default value for all instructions.  Some target's provide a
+    // dummy (Generic) itinerary which should be handled as if it's itinerary is
+    // empty. We identify this by looking for a reference to stage zero (invalid
+    // stage). This is different from beginStage == endState != 0, which could
+    // be used for zero-latency pseudo ops.
+    if (isEmpty() || Itineraries[ItinClassIndx].FirstStage == 0)
       return 1;
 
     // Calculate the maximum completion time for any stage.
diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h
index bdd214b..0914b5d 100644
--- a/include/llvm/Target/TargetLibraryInfo.h
+++ b/include/llvm/Target/TargetLibraryInfo.h
@@ -23,9 +23,21 @@ namespace llvm {
       // void *memcpy(void *s1, const void *s2, size_t n);
       memcpy,
       
+      // void *memmove(void *s1, const void *s2, size_t n);
+      memmove,
+      
       /// void memset_pattern16(void *b, const void *pattern16, size_t len);
       memset_pattern16,
       
+      /// int iprintf(const char *format, ...);
+      iprintf,
+      
+      /// int siprintf(char *str, const char *format, ...);
+      siprintf,
+      
+      /// int fiprintf(FILE *stream, const char *format, ...);
+      fiprintf,
+      
       NumLibFuncs
     };
   }
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index ba7574d..17d761c 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -39,6 +39,7 @@ namespace llvm {
   class AllocaInst;
   class APFloat;
   class CallInst;
+  class CCState;
   class Function;
   class FastISel;
   class FunctionLoweringInfo;
@@ -189,14 +190,6 @@ public:
     return RepRegClassCostForVT[VT.getSimpleVT().SimpleTy];
   }
 
-  /// getRegPressureLimit - Return the register pressure "high water mark" for
-  /// the specific register class. The scheduler is in high register pressure
-  /// mode (for the specific register class) if it goes over the limit.
-  virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC,
-                                       MachineFunction &MF) const {
-    return 0;
-  }
-
   /// isTypeLegal - Return true if the target has native support for the
   /// specified value type.  This means that it has a register that directly
   /// holds it without promotions or expansions.
@@ -934,6 +927,7 @@ public:
     bool isCalledByLegalizer() const { return CalledByLegalizer; }
 
     void AddToWorklist(SDNode *N);
+    void RemoveFromWorklist(SDNode *N);
     SDValue CombineTo(SDNode *N, const std::vector<SDValue> &To,
                       bool AddTo = true);
     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
@@ -1048,7 +1042,7 @@ protected:
   }
 
   /// JumpIsExpensive - Tells the code generator not to expand sequence of
-  /// operations into a seperate sequences that increases the amount of
+  /// operations into a separate sequences that increases the amount of
   /// flow control.
   void setJumpIsExpensive(bool isExpensive = true) {
     JumpIsExpensive = isExpensive;
@@ -1258,6 +1252,9 @@ public:
     return SDValue();    // this is here to silence compiler errors
   }
 
+  /// HandleByVal - Target-specific cleanup for formal ByVal parameters.
+  virtual void HandleByVal(CCState *, unsigned &) const {}
+
   /// CanLowerReturn - This hook should be implemented to check whether the
   /// return values described by the Outs array can fit into the return
   /// registers.  If false is returned, an sret-demotion is performed.
@@ -1291,6 +1288,26 @@ public:
     return false;
   }
 
+  /// mayBeEmittedAsTailCall - Return true if the target may be able emit the
+  /// call instruction as a tail call. This is used by optimization passes to
+  /// determine if it's profitable to duplicate return instructions to enable
+  /// tailcall optimization.
+  virtual bool mayBeEmittedAsTailCall(CallInst *CI) const {
+    return false;
+  }
+
+  /// getTypeForExtArgOrReturn - Return the type that should be used to zero or
+  /// sign extend a zeroext/signext integer argument or return value.
+  /// FIXME: Most C calling convention requires the return type to be promoted,
+  /// but this is not true all the time, e.g. i1 on x86-64. It is also not
+  /// necessary for non-C calling conventions. The frontend should handle this
+  /// and include all of the necessary information.
+  virtual EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+                                       ISD::NodeType ExtendKind) const {
+    EVT MinVT = getRegisterType(Context, MVT::i32);
+    return VT.bitsLT(MinVT) ? MinVT : VT;
+  }
+
   /// LowerOperationWrapper - This callback is invoked by the type legalizer
   /// to legalize nodes with an illegal operand type but legal result types.
   /// It replaces the LowerOperation callback in the type Legalizer.
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index 34bf271..7402ed6 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -140,6 +140,9 @@ public:
   const MCSection *getStaticDtorSection() const { return StaticDtorSection; }
   const MCSection *getLSDASection() const { return LSDASection; }
   virtual const MCSection *getEHFrameSection() const = 0;
+  virtual void emitPersonalityValue(MCStreamer &Streamer,
+                                    const TargetMachine &TM,
+                                    const MCSymbol *Sym) const;
   const MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; }
   const MCSection *getDwarfInfoSection() const { return DwarfInfoSection; }
   const MCSection *getDwarfLineSection() const { return DwarfLineSection; }
@@ -218,15 +221,19 @@ public:
                                  MachineModuleInfo *MMI, unsigned Encoding,
                                  MCStreamer &Streamer) const;
 
+  // getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality.
+  virtual MCSymbol *
+  getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+                          MachineModuleInfo *MMI) const;
+
   /// 
   const MCExpr *
-  getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang,
-                           MachineModuleInfo *MMI, unsigned Encoding,
+  getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding,
                            MCStreamer &Streamer) const;
   
   virtual unsigned getPersonalityEncoding() const;
   virtual unsigned getLSDAEncoding() const;
-  virtual unsigned getFDEEncoding() const;
+  virtual unsigned getFDEEncoding(bool CFI) const;
   virtual unsigned getTTypeEncoding() const;
 
 protected:
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 030bf5b..78f770c 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -38,6 +38,7 @@ class PassManager;
 class Pass;
 class TargetELFWriterInfo;
 class formatted_raw_ostream;
+class raw_ostream;
 
 // Relocation model types.
 namespace Reloc {
@@ -105,7 +106,9 @@ protected: // Can only create subclasses.
 
   unsigned MCRelaxAll : 1;
   unsigned MCNoExecStack : 1;
+  unsigned MCSaveTempLabels : 1;
   unsigned MCUseLoc : 1;
+  unsigned MCUseCFI : 1;
 
 public:
   virtual ~TargetMachine();
@@ -171,6 +174,14 @@ public:
   /// relaxed.
   void setMCRelaxAll(bool Value) { MCRelaxAll = Value; }
 
+  /// hasMCSaveTempLabels - Check whether temporary labels will be preserved
+  /// (i.e., not treated as temporary).
+  bool hasMCSaveTempLabels() const { return MCSaveTempLabels; }
+
+  /// setMCSaveTempLabels - Set whether temporary labels will be preserved
+  /// (i.e., not treated as temporary).
+  void setMCSaveTempLabels(bool Value) { MCSaveTempLabels = Value; }
+
   /// hasMCNoExecStack - Check whether an executable stack is not needed.
   bool hasMCNoExecStack() const { return MCNoExecStack; }
 
@@ -183,6 +194,12 @@ public:
   /// setMCUseLoc - Set whether all we should use dwarf's .loc directive.
   void setMCUseLoc(bool Value) { MCUseLoc = Value; }
 
+  /// hasMCUseCFI - Check whether we should use dwarf's .cfi_* directives.
+  bool hasMCUseCFI() const { return MCUseCFI; }
+
+  /// setMCUseCFI - Set whether all we should use dwarf's .cfi_* directives.
+  void setMCUseCFI(bool Value) { MCUseCFI = Value; }
+
   /// getRelocationModel - Returns the code generation relocation model. The
   /// choices are static, PIC, and dynamic-no-pic, and target default.
   static Reloc::Model getRelocationModel();
@@ -267,6 +284,7 @@ public:
   ///
   virtual bool addPassesToEmitMC(PassManagerBase &,
                                  MCContext *&,
+                                 raw_ostream &,
                                  CodeGenOpt::Level,
                                  bool = true) {
     return true;
@@ -324,6 +342,7 @@ public:
   ///
   virtual bool addPassesToEmitMC(PassManagerBase &PM,
                                  MCContext *&Ctx,
+                                 raw_ostream &OS,
                                  CodeGenOpt::Level OptLevel,
                                  bool DisableVerify = true);
 
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index 97ceffd..62190c1 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -157,6 +157,11 @@ namespace llvm {
   /// wth earlier copy coalescing.
   extern bool StrongPHIElim;
 
+  /// getTrapFunctionName - If this returns a non-empty string, this means isel
+  /// should lower Intrinsic::trap to a call to the specified function name
+  /// instead of an ISD::TRAP node.
+  extern StringRef getTrapFunctionName();
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index 121091c..205e76f 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -46,6 +46,7 @@ struct TargetRegisterDesc {
   const unsigned *Overlaps;     // Overlapping registers, described above
   const unsigned *SubRegs;      // Sub-register set, described above
   const unsigned *SuperRegs;    // Super-register set, described above
+  unsigned CostPerUse;          // Extra cost of instructions using register.
 };
 
 class TargetRegisterClass {
@@ -426,6 +427,12 @@ public:
     return get(RegNo).Name;
   }
 
+  /// getCostPerUse - Return the additional cost of using this register instead
+  /// of other registers in its class.
+  unsigned getCostPerUse(unsigned RegNo) const {
+    return get(RegNo).CostPerUse;
+  }
+
   /// getNumRegs - Return the number of registers this target has (useful for
   /// sizing arrays holding per register information)
   unsigned getNumRegs() const {
@@ -588,11 +595,32 @@ public:
   }
 
   /// getCrossCopyRegClass - Returns a legal register class to copy a register
-  /// in the specified class to or from. Returns NULL if it is possible to copy
-  /// between a two registers of the specified class.
+  /// in the specified class to or from. If it is possible to copy the register
+  /// directly without using a cross register class copy, return the specified
+  /// RC. Returns NULL if it is not possible to copy between a two registers of
+  /// the specified class.
   virtual const TargetRegisterClass *
   getCrossCopyRegClass(const TargetRegisterClass *RC) const {
-    return NULL;
+    return RC;
+  }
+
+  /// getLargestLegalSuperClass - Returns the largest super class of RC that is
+  /// legal to use in the current sub-target and has the same spill size.
+  /// The returned register class can be used to create virtual registers which
+  /// means that all its registers can be copied and spilled.
+  virtual const TargetRegisterClass*
+  getLargestLegalSuperClass(const TargetRegisterClass *RC) const {
+    /// The default implementation is very conservative and doesn't allow the
+    /// register allocator to inflate register classes.
+    return RC;
+  }
+
+  /// getRegPressureLimit - Return the register pressure "high water mark" for
+  /// the specific register class. The scheduler is in high register pressure
+  /// mode (for the specific register class) if it goes over the limit.
+  virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                                       MachineFunction &MF) const {
+    return 0;
   }
 
   /// getAllocationOrder - Returns the register allocation order for a specified
@@ -614,6 +642,14 @@ public:
     return 0;
   }
 
+  /// avoidWriteAfterWrite - Return true if the register allocator should avoid
+  /// writing a register from RC in two consecutive instructions.
+  /// This can avoid pipeline stalls on certain architectures.
+  /// It does cause increased register pressure, though.
+  virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
+    return false;
+  }
+
   /// UpdateRegAllocHint - A callback to allow target a chance to update
   /// register allocation hints when a register is "changed" (e.g. coalesced)
   /// to another register. e.g. On ARM, some virtual registers should target
@@ -631,6 +667,13 @@ public:
     return false;
   }
 
+  /// useFPForScavengingIndex - returns true if the target wants to use
+  /// frame pointer based accesses to spill to the scavenger emergency spill
+  /// slot.
+  virtual bool useFPForScavengingIndex(const MachineFunction &MF) const {
+    return true;
+  }
+
   /// requiresFrameIndexScavenging - returns true if the target requires post
   /// PEI scavenging of registers for materializing frame index constants.
   virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
diff --git a/include/llvm/Target/TargetRegistry.h b/include/llvm/Target/TargetRegistry.h
index f851ad0..a464822 100644
--- a/include/llvm/Target/TargetRegistry.h
+++ b/include/llvm/Target/TargetRegistry.h
@@ -43,7 +43,7 @@ namespace llvm {
 
   MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
                                 bool isVerboseAsm,
-                                bool useLoc,
+                                bool useLoc, bool useCFI,
                                 MCInstPrinter *InstPrint,
                                 MCCodeEmitter *CE,
                                 TargetAsmBackend *TAB,
@@ -78,6 +78,7 @@ namespace llvm {
                                                 TargetMachine &TM);
     typedef MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T);
     typedef MCInstPrinter *(*MCInstPrinterCtorTy)(const Target &T,
+                                                  TargetMachine &TM,
                                                   unsigned SyntaxVariant,
                                                   const MCAsmInfo &MAI);
     typedef MCCodeEmitter *(*CodeEmitterCtorTy)(const Target &T,
@@ -95,6 +96,7 @@ namespace llvm {
                                              formatted_raw_ostream &OS,
                                              bool isVerboseAsm,
                                              bool useLoc,
+                                             bool useCFI,
                                              MCInstPrinter *InstPrint,
                                              MCCodeEmitter *CE,
                                              TargetAsmBackend *TAB,
@@ -286,11 +288,12 @@ namespace llvm {
       return MCDisassemblerCtorFn(*this);
     }
 
-    MCInstPrinter *createMCInstPrinter(unsigned SyntaxVariant,
+    MCInstPrinter *createMCInstPrinter(TargetMachine &TM,
+                                       unsigned SyntaxVariant,
                                        const MCAsmInfo &MAI) const {
       if (!MCInstPrinterCtorFn)
         return 0;
-      return MCInstPrinterCtorFn(*this, SyntaxVariant, MAI);
+      return MCInstPrinterCtorFn(*this, TM, SyntaxVariant, MAI);
     }
 
 
@@ -327,12 +330,13 @@ namespace llvm {
                                   formatted_raw_ostream &OS,
                                   bool isVerboseAsm,
                                   bool useLoc,
+                                  bool useCFI,
                                   MCInstPrinter *InstPrint,
                                   MCCodeEmitter *CE,
                                   TargetAsmBackend *TAB,
                                   bool ShowInst) const {
       // AsmStreamerCtorFn is default to llvm::createAsmStreamer
-      return AsmStreamerCtorFn(Ctx, OS, isVerboseAsm, useLoc,
+      return AsmStreamerCtorFn(Ctx, OS, isVerboseAsm, useLoc, useCFI,
                                InstPrint, CE, TAB, ShowInst);
     }
 
diff --git a/include/llvm/Target/TargetSelect.h b/include/llvm/Target/TargetSelect.h
index 1891f87..c5ab90b 100644
--- a/include/llvm/Target/TargetSelect.h
+++ b/include/llvm/Target/TargetSelect.h
@@ -120,6 +120,19 @@ namespace llvm {
     return true;
 #endif
   }  
+
+  /// InitializeNativeTargetAsmParser - The main program should call
+  /// this function to initialize the native target asm parser.
+  inline bool InitializeNativeTargetAsmParser() {
+  // If we have a native target, initialize the corresponding asm parser.
+#ifdef LLVM_NATIVE_ASMPARSER
+    LLVM_NATIVE_ASMPARSER();
+    return false;
+#else
+    return true;
+#endif
+  }  
+
 }
 
 #endif
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index c9be40d..ff8d07d 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -490,6 +490,18 @@ class SDNodeXForm<SDNode opc, code xformFunction> {
 
 def NOOP_SDNodeXForm : SDNodeXForm<imm, [{}]>;
 
+//===----------------------------------------------------------------------===//
+// PatPred Subclasses.
+//
+// These allow specifying different sorts of predicates that control whether a
+// node is matched.
+//
+class PatPred;
+
+class CodePatPred<code predicate> : PatPred {
+  code PredicateCode = predicate;
+}
+
 
 //===----------------------------------------------------------------------===//
 // Selection DAG Pattern Fragments.
@@ -507,7 +519,8 @@ class PatFrag<dag ops, dag frag, code pred = [{}],
               SDNodeXForm xform = NOOP_SDNodeXForm> : SDPatternOperator {
   dag Operands = ops;
   dag Fragment = frag;
-  code Predicate = pred;
+  code PredicateCode = pred;
+  code ImmediateCode = [{}];
   SDNodeXForm OperandTransform = xform;
 }
 
@@ -516,6 +529,27 @@ class PatFrag<dag ops, dag frag, code pred = [{}],
 class PatLeaf<dag frag, code pred = [{}], SDNodeXForm xform = NOOP_SDNodeXForm>
  : PatFrag<(ops), frag, pred, xform>;
 
+
+// ImmLeaf is a pattern fragment with a constraint on the immediate.  The
+// constraint is a function that is run on the immediate (always with the value
+// sign extended out to an int64_t) as Imm.  For example:
+//
+//  def immSExt8 : ImmLeaf<i16, [{ return (char)Imm == Imm; }]>;
+//
+// this is a more convenient form to match 'imm' nodes in than PatLeaf and also
+// is preferred over using PatLeaf because it allows the code generator to
+// reason more about the constraint.
+//
+// If FastIsel should ignore all instructions that have an operand of this type,
+// the FastIselShouldIgnore flag can be set.  This is an optimization to reduce
+// the code size of the generated fast instruction selector.
+class ImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm>
+  : PatFrag<(ops), (vt imm), [{}], xform> {
+  let ImmediateCode = pred;
+  bit FastIselShouldIgnore = 0;
+}
+
+
 // Leaf fragments.
 
 def vtInt      : PatLeaf<(vt),  [{ return N->getVT().isInteger(); }]>;
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index 1239881..d12fd1d 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -152,7 +152,6 @@ ModulePass *createDeadArgHackingPass();
 /// equal to maxElements (maxElements == 0 means always promote).
 ///
 Pass *createArgumentPromotionPass(unsigned maxElements = 3);
-Pass *createStructRetPromotionPass();
 
 //===----------------------------------------------------------------------===//
 /// createIPConstantPropagationPass - This pass propagates constants from call
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index aa9873f..088775a 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -17,7 +17,6 @@
 namespace llvm {
 
 class ModulePass;
-class FunctionPass;
 
 // Insert edge profiling instrumentation
 ModulePass *createEdgeProfilerPass();
@@ -28,6 +27,9 @@ ModulePass *createOptimalEdgeProfilerPass();
 // Insert path profiling instrumentation
 ModulePass *createPathProfilerPass();
 
+// Insert GCOV profiling instrumentation
+ModulePass *createGCOVProfilerPass(bool EmitNotes = true, bool EmitData = true);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 6f2a38e..de46a8d 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -128,7 +128,7 @@ Pass *createLoopInstSimplifyPass();
 //
 // LoopUnroll - This pass is a simple loop unrolling pass.
 //
-Pass *createLoopUnrollPass();
+Pass *createLoopUnrollPass(int Threshold = -1, int Count = -1, int AllowPartial = -1);
 
 //===----------------------------------------------------------------------===//
 //
@@ -301,12 +301,6 @@ FunctionPass *createSimplifyLibCallsPass();
 
 //===----------------------------------------------------------------------===//
 //
-/// createSimplifyHalfPowrLibCallsPass - This is an experimental pass that
-/// optimizes specific half_pow functions.
-FunctionPass *createSimplifyHalfPowrLibCallsPass();
-
-//===----------------------------------------------------------------------===//
-//
 // CodeGenPrepare - This pass prepares a function for instruction selection.
 //
 FunctionPass *createCodeGenPreparePass(const TargetLowering *TLI = 0);
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 5335860..90eabef 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -19,6 +19,7 @@
 
 #include "llvm/BasicBlock.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
 
@@ -181,6 +182,10 @@ BasicBlock *SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds,
 ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
                                        BasicBlock *Pred);
 
+/// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a 
+/// given basic block.
+DebugLoc GetFirstDebugLocInBasicBlock(const BasicBlock *BB);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 24ebb10..853de2d 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -207,7 +207,7 @@ public:
 ///
 /// Note that this only does one level of inlining.  For example, if the
 /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
-/// exists in the instruction stream.  Similiarly this will inline a recursive
+/// exists in the instruction stream.  Similarly this will inline a recursive
 /// function by one level.
 ///
 bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI);
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 2823fbb..e61dcb3 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -19,14 +19,19 @@ namespace llvm {
 
 class User;
 class BasicBlock;
+class Function;
 class BranchInst;
 class Instruction;
+class DbgDeclareInst;
+class StoreInst;
+class LoadInst;
 class Value;
 class Pass;
 class PHINode;
 class AllocaInst;
 class ConstantExpr;
 class TargetData;
+class DIBuilder;
 
 template<typename T> class SmallVectorImpl;
   
@@ -69,10 +74,6 @@ bool RecursivelyDeleteDeadPHINode(PHINode *PN);
 ///
 /// This returns true if it changed the code, note that it can delete
 /// instructions in other blocks as well in this block.
-///
-/// WARNING: Do not use this function on unreachable blocks, as recursive
-/// simplification is not able to handle corner-case scenarios that can
-/// arise in them.
 bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD = 0);
     
 //===----------------------------------------------------------------------===//
@@ -157,6 +158,24 @@ static inline unsigned getKnownAlignment(Value *V, const TargetData *TD = 0) {
   return getOrEnforceKnownAlignment(V, 0, TD);
 }
 
+///===---------------------------------------------------------------------===//
+///  Dbg Intrinsic utilities
+///
+
+/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+                                     StoreInst *SI, DIBuilder &Builder);
+
+/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+                                     LoadInst *LI, DIBuilder &Builder);
+
+/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
+/// of llvm.dbg.value intrinsics.
+bool LowerDbgDeclare(Function &F);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/TypeSymbolTable.h b/include/llvm/TypeSymbolTable.h
index 9fdcb98..89ad534 100644
--- a/include/llvm/TypeSymbolTable.h
+++ b/include/llvm/TypeSymbolTable.h
@@ -133,7 +133,7 @@ private:
   /// is refined.
   virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
 
-  /// This function markes a type as being concrete (defined).
+  /// This function marks a type as being concrete (defined).
   virtual void typeBecameConcrete(const DerivedType *AbsTy);
 
 /// @}
diff --git a/include/llvm/User.h b/include/llvm/User.h
index 1363495..3f9c28e 100644
--- a/include/llvm/User.h
+++ b/include/llvm/User.h
@@ -95,11 +95,11 @@ public:
     OperandList[i] = Val;
   }
   const Use &getOperandUse(unsigned i) const {
-    assert(i < NumOperands && "getOperand() out of range!");
+    assert(i < NumOperands && "getOperandUse() out of range!");
     return OperandList[i];
   }
   Use &getOperandUse(unsigned i) {
-    assert(i < NumOperands && "getOperand() out of range!");
+    assert(i < NumOperands && "getOperandUse() out of range!");
     return OperandList[i];
   }
   
diff --git a/include/llvm/Value.h b/include/llvm/Value.h
index 130e273..3a1c3ca 100644
--- a/include/llvm/Value.h
+++ b/include/llvm/Value.h
@@ -51,8 +51,8 @@ class MDNode;
 /// This is a very important LLVM class. It is the base class of all values 
 /// computed by a program that may be used as operands to other values. Value is
 /// the super class of other important classes such as Instruction and Function.
-/// All Values have a Type. Type is not a subclass of Value. All types can have
-/// a name and they should belong to some Module. Setting the name on the Value
+/// All Values have a Type. Type is not a subclass of Value. Some values can
+/// have a name and they belong to some Module.  Setting the name on the Value
 /// automatically updates the module's symbol table.
 ///
 /// Every value has a "use list" that keeps track of which other Values are
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index be02ddb..c189a00 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -86,14 +86,20 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
 
   if (onlyAccessesArgPointees(MRB)) {
     bool doesAlias = false;
-    if (doesAccessArgPointees(MRB))
+    if (doesAccessArgPointees(MRB)) {
+      MDNode *CSTag = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
       for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
-           AI != AE; ++AI)
-        if (!isNoAlias(Location(*AI), Loc)) {
+           AI != AE; ++AI) {
+        const Value *Arg = *AI;
+        if (!Arg->getType()->isPointerTy())
+          continue;
+        Location CSLoc(Arg, UnknownSize, CSTag);
+        if (!isNoAlias(CSLoc, Loc)) {
           doesAlias = true;
           break;
         }
-
+      }
+    }
     if (!doesAlias)
       return NoModRef;
   }
@@ -138,13 +144,19 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
   // CS2's arguments.
   if (onlyAccessesArgPointees(CS2B)) {
     AliasAnalysis::ModRefResult R = NoModRef;
-    if (doesAccessArgPointees(CS2B))
+    if (doesAccessArgPointees(CS2B)) {
+      MDNode *CS2Tag = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
       for (ImmutableCallSite::arg_iterator
            I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
-        R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask);
+        const Value *Arg = *I;
+        if (!Arg->getType()->isPointerTy())
+          continue;
+        Location CS2Loc(Arg, UnknownSize, CS2Tag);
+        R = ModRefResult((R | getModRefInfo(CS1, CS2Loc)) & Mask);
         if (R == Mask)
           break;
       }
+    }
     return R;
   }
 
@@ -152,13 +164,20 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
   // any of the memory referenced by CS1's arguments. If not, return NoModRef.
   if (onlyAccessesArgPointees(CS1B)) {
     AliasAnalysis::ModRefResult R = NoModRef;
-    if (doesAccessArgPointees(CS1B))
+    if (doesAccessArgPointees(CS1B)) {
+      MDNode *CS1Tag = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
       for (ImmutableCallSite::arg_iterator
-           I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I)
-        if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) {
+           I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
+        const Value *Arg = *I;
+        if (!Arg->getType()->isPointerTy())
+          continue;
+        Location CS1Loc(Arg, UnknownSize, CS1Tag);
+        if (getModRefInfo(CS2, CS1Loc) != NoModRef) {
           R = Mask;
           break;
         }
+      }
+    }
     if (R == NoModRef)
       return R;
   }
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 3a46976d..2ed6949 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -602,6 +602,10 @@ void AliasSetTracker::ASTCallbackVH::deleted() {
   // this now dangles!
 }
 
+void AliasSetTracker::ASTCallbackVH::allUsesReplacedWith(Value *V) {
+  AST->copyValue(getValPtr(), V);
+}
+
 AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast)
   : CallbackVH(V), AST(ast) {}
 
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 1af1c35..6ebe100 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -43,14 +43,12 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeLazyValueInfoPass(Registry);
   initializeLibCallAliasAnalysisPass(Registry);
   initializeLintPass(Registry);
-  initializeLiveValuesPass(Registry);
   initializeLoopDependenceAnalysisPass(Registry);
   initializeLoopInfoPass(Registry);
   initializeMemDepPrinterPass(Registry);
   initializeMemoryDependenceAnalysisPass(Registry);
   initializeModuleDebugInfoPrinterPass(Registry);
   initializePostDominatorTreePass(Registry);
-  initializePostDominanceFrontierPass(Registry);
   initializeProfileEstimatorPassPass(Registry);
   initializeNoProfileInfoPass(Registry);
   initializeNoPathProfileInfoPass(Registry);
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index f7bcd9e..f1bb8a3 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -350,7 +350,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       Scale *= IndexScale.getSExtValue();
       
       
-      // If we already had an occurrance of this index variable, merge this
+      // If we already had an occurrence of this index variable, merge this
       // scale into it.  For example, we want to handle:
       //   A[x][x] -> x*16 + x*4 -> x*20
       // This also ensures that 'x' only appears in the index list once.
@@ -779,6 +779,26 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
         return NoModRef;
       break;
     }
+    case Intrinsic::arm_neon_vld1: {
+      // LLVM's vld1 and vst1 intrinsics currently only support a single
+      // vector register.
+      uint64_t Size =
+        TD ? TD->getTypeStoreSize(II->getType()) : UnknownSize;
+      if (isNoAlias(Location(II->getArgOperand(0), Size,
+                             II->getMetadata(LLVMContext::MD_tbaa)),
+                    Loc))
+        return NoModRef;
+      break;
+    }
+    case Intrinsic::arm_neon_vst1: {
+      uint64_t Size =
+        TD ? TD->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize;
+      if (isNoAlias(Location(II->getArgOperand(0), Size,
+                             II->getMetadata(LLVMContext::MD_tbaa)),
+                    Loc))
+        return NoModRef;
+      break;
+    }
     }
 
   // The AliasAnalysis base class has some smarts, lets use them.
@@ -883,7 +903,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
   if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty())
     return MustAlias;
 
-  // If there is a difference betwen the pointers, but the difference is
+  // If there is a difference between the pointers, but the difference is
   // less than the size of the associated memory object, then we know
   // that the objects are partially overlapping.
   if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) {
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 1a738fa..6be5617 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -9,11 +9,11 @@ add_llvm_library(LLVMAnalysis
   CFGPrinter.cpp
   CaptureTracking.cpp
   ConstantFolding.cpp
+  DIBuilder.cpp
   DbgInfoPrinter.cpp
   DebugInfo.cpp
-  DIBuilder.cpp
-  DominanceFrontier.cpp
   DomPrinter.cpp
+  DominanceFrontier.cpp
   IVUsers.cpp
   InlineCost.cpp
   InstCount.cpp
@@ -24,7 +24,6 @@ add_llvm_library(LLVMAnalysis
   LibCallAliasAnalysis.cpp
   LibCallSemantics.cpp
   Lint.cpp
-  LiveValues.cpp
   Loads.cpp
   LoopDependenceAnalysis.cpp
   LoopInfo.cpp
@@ -33,11 +32,11 @@ add_llvm_library(LLVMAnalysis
   MemoryBuiltins.cpp
   MemoryDependenceAnalysis.cpp
   ModuleDebugInfoPrinter.cpp
+  NoAliasAnalysis.cpp
+  PHITransAddr.cpp
   PathNumbering.cpp
   PathProfileInfo.cpp
   PathProfileVerifier.cpp
-  NoAliasAnalysis.cpp
-  PHITransAddr.cpp
   PostDominators.cpp
   ProfileEstimatorPass.cpp
   ProfileInfo.cpp
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 42a54d9..b2c27d1 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -17,6 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/Value.h"
 #include "llvm/Analysis/AliasAnalysis.h"
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index cd8d52c..5de2b04 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -23,6 +23,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/Operator.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/SmallVector.h"
@@ -1048,11 +1049,12 @@ llvm::canConstantFoldCallTo(const Function *F) {
   case Intrinsic::ctpop:
   case Intrinsic::ctlz:
   case Intrinsic::cttz:
-  case Intrinsic::uadd_with_overflow:
-  case Intrinsic::usub_with_overflow:
   case Intrinsic::sadd_with_overflow:
+  case Intrinsic::uadd_with_overflow:
   case Intrinsic::ssub_with_overflow:
+  case Intrinsic::usub_with_overflow:
   case Intrinsic::smul_with_overflow:
+  case Intrinsic::umul_with_overflow:
   case Intrinsic::convert_from_fp16:
   case Intrinsic::convert_to_fp16:
   case Intrinsic::x86_sse_cvtss2si:
@@ -1362,7 +1364,8 @@ llvm::ConstantFoldCall(Function *F,
         case Intrinsic::uadd_with_overflow:
         case Intrinsic::ssub_with_overflow:
         case Intrinsic::usub_with_overflow:
-        case Intrinsic::smul_with_overflow: {
+        case Intrinsic::smul_with_overflow:
+        case Intrinsic::umul_with_overflow: {
           APInt Res;
           bool Overflow;
           switch (F->getIntrinsicID()) {
@@ -1382,6 +1385,9 @@ llvm::ConstantFoldCall(Function *F,
           case Intrinsic::smul_with_overflow:
             Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow);
             break;
+          case Intrinsic::umul_with_overflow:
+            Res = Op1->getValue().umul_ov(Op2->getValue(), Overflow);
+            break;
           }
           Constant *Ops[] = {
             ConstantInt::get(F->getContext(), Res),
diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp
index 590a9c1..dc98c9e 100644
--- a/lib/Analysis/DIBuilder.cpp
+++ b/lib/Analysis/DIBuilder.cpp
@@ -50,7 +50,7 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
     MDString::get(VMContext, Flags),
     ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer)
   };
-  TheCU = DICompileUnit(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  TheCU = DICompileUnit(MDNode::get(VMContext, Elts));
 }
 
 /// createFile - Create a file descriptor to hold debugging information
@@ -63,7 +63,7 @@ DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
     MDString::get(VMContext, Directory),
     TheCU
   };
-  return DIFile(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIFile(MDNode::get(VMContext, Elts));
 }
 
 /// createEnumerator - Create a single enumerator value.
@@ -73,7 +73,7 @@ DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) {
     MDString::get(VMContext, Name),
     ConstantInt::get(Type::getInt64Ty(VMContext), Val)
   };
-  return DIEnumerator(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIEnumerator(MDNode::get(VMContext, Elts));
 }
 
 /// createBasicType - Create debugging information entry for a basic 
@@ -95,7 +95,7 @@ DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
     ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createQaulifiedType - Create debugging information entry for a qualified
@@ -114,7 +114,7 @@ DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
     FromTy
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createPointerType - Create debugging information entry for a pointer.
@@ -133,7 +133,7 @@ DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
     PointeeTy
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createReferenceType - Create debugging information entry for a reference.
@@ -151,7 +151,7 @@ DIType DIBuilder::createReferenceType(DIType RTy) {
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
     RTy
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createTypedef - Create debugging information entry for a typedef.
@@ -171,7 +171,7 @@ DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
     Ty
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createFriend - Create debugging information entry for a 'friend'.
@@ -191,7 +191,7 @@ DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
     FriendTy
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createInheritance - Create debugging information entry to establish
@@ -211,7 +211,7 @@ DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
     BaseTy
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createMemberType - Create debugging information entry for a member.
@@ -233,7 +233,36 @@ DIType DIBuilder::createMemberType(StringRef Name,
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
     Ty
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createObjCIVar - Create debugging information entry for Objective-C
+/// instance variable.
+DIType DIBuilder::createObjCIVar(StringRef Name, 
+                                 DIFile File, unsigned LineNumber, 
+                                 uint64_t SizeInBits, uint64_t AlignInBits,
+                                 uint64_t OffsetInBits, unsigned Flags, 
+                                 DIType Ty, StringRef PropertyName,
+                                 StringRef GetterName, StringRef SetterName,
+                                 unsigned PropertyAttributes) {
+  // TAG_member is encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_member),
+    File, // Or TheCU ? Ty ?
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    Ty,
+    MDString::get(VMContext, PropertyName),
+    MDString::get(VMContext, GetterName),
+    MDString::get(VMContext, SetterName),
+    ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes)
+  };
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createClassType - Create debugging information entry for a class.
@@ -260,7 +289,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
     VTableHoder,
     TemplateParams
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createTemplateTypeParameter - Create debugging information for template
@@ -278,8 +307,7 @@ DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
     ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
   };
-  return DITemplateTypeParameter(MDNode::get(VMContext, &Elts[0], 
-                                             array_lengthof(Elts)));
+  return DITemplateTypeParameter(MDNode::get(VMContext, Elts));
 }
 
 /// createTemplateValueParameter - Create debugging information for template
@@ -299,8 +327,7 @@ DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
     ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
   };
-  return DITemplateValueParameter(MDNode::get(VMContext, &Elts[0], 
-                                              array_lengthof(Elts)));
+  return DITemplateValueParameter(MDNode::get(VMContext, Elts));
 }
 
 /// createStructType - Create debugging information entry for a struct.
@@ -325,7 +352,7 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name,
     ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createUnionType - Create debugging information entry for an union.
@@ -350,7 +377,7 @@ DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
     ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createSubroutineType - Create subroutine type.
@@ -371,7 +398,7 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createEnumerationType - Create debugging information entry for an 
@@ -396,7 +423,7 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
   };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  MDNode *Node = MDNode::get(VMContext, Elts);
   NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
   NMD->addOperand(Node);
   return DIType(Node);
@@ -421,7 +448,7 @@ DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createVectorType - Create debugging information entry for a vector.
@@ -443,7 +470,7 @@ DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createArtificialType - Create a new DIType with "artificial" flag set.
@@ -467,7 +494,7 @@ DIType DIBuilder::createArtificialType(DIType Ty) {
   // Flags are stored at this slot.
   Elts[8] =  ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
 
-  return DIType(MDNode::get(VMContext, Elts.data(), Elts.size()));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// retainType - Retain DIType in a module even if it is not referenced 
@@ -483,7 +510,7 @@ DIDescriptor DIBuilder::createUnspecifiedParameter() {
   Value *Elts[] = { 
     GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) 
   };
-  return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1));
+  return DIDescriptor(MDNode::get(VMContext, Elts));
 }
 
 /// createTemporaryType - Create a temporary forward-declared type.
@@ -491,7 +518,7 @@ DIType DIBuilder::createTemporaryType() {
   // Give the temporary MDNode a tag. It doesn't matter what tag we
   // use here as long as DIType accepts it.
   Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
-  MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
+  MDNode *Node = MDNode::getTemporary(VMContext, Elts);
   return DIType(Node);
 }
 
@@ -505,17 +532,17 @@ DIType DIBuilder::createTemporaryType(DIFile F) {
     NULL,
     F
   };
-  MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
+  MDNode *Node = MDNode::getTemporary(VMContext, Elts);
   return DIType(Node);
 }
 
 /// getOrCreateArray - Get a DIArray, create one if required.
-DIArray DIBuilder::getOrCreateArray(Value *const *Elements, unsigned NumElements) {
-  if (NumElements == 0) {
+DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) {
+  if (Elements.empty()) {
     Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext));
-    return DIArray(MDNode::get(VMContext, &Null, 1));
+    return DIArray(MDNode::get(VMContext, Null));
   }
-  return DIArray(MDNode::get(VMContext, Elements, NumElements));
+  return DIArray(MDNode::get(VMContext, Elements));
 }
 
 /// getOrCreateSubrange - Create a descriptor for a value range.  This
@@ -527,7 +554,7 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) {
     ConstantInt::get(Type::getInt64Ty(VMContext), Hi)
   };
 
-  return DISubrange(MDNode::get(VMContext, &Elts[0], 3));
+  return DISubrange(MDNode::get(VMContext, Elts));
 }
 
 /// createGlobalVariable - Create a new descriptor for the specified global.
@@ -548,7 +575,7 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
     ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
     Val
   };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  MDNode *Node = MDNode::get(VMContext, Elts);
   // Create a named metadata so that we do not lose this mdnode.
   NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
   NMD->addOperand(Node);
@@ -575,7 +602,7 @@ createStaticVariable(DIDescriptor Context, StringRef Name,
     ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
     Val
   };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  MDNode *Node = MDNode::get(VMContext, Elts);
   // Create a named metadata so that we do not lose this mdnode.
   NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
   NMD->addOperand(Node);
@@ -586,17 +613,18 @@ createStaticVariable(DIDescriptor Context, StringRef Name,
 DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
                                           StringRef Name, DIFile File,
                                           unsigned LineNo, DIType Ty, 
-                                          bool AlwaysPreserve, unsigned Flags) {
+                                          bool AlwaysPreserve, unsigned Flags,
+                                          unsigned ArgNo) {
   Value *Elts[] = {
     GetTagConstant(VMContext, Tag),
     Scope,
     MDString::get(VMContext, Name),
     File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))),
     Ty,
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags)
   };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  MDNode *Node = MDNode::get(VMContext, Elts);
   if (AlwaysPreserve) {
     // The optimizer may remove local variable. If there is an interest
     // to preserve variable info in such situation then stash it in a
@@ -619,18 +647,19 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
 DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
                                             StringRef Name, DIFile F,
                                             unsigned LineNo,
-                                            DIType Ty, Value *const *Addr,
-                                            unsigned NumAddr) {
+                                            DIType Ty, ArrayRef<Value *> Addr,
+                                            unsigned ArgNo) {
   SmallVector<Value *, 15> Elts;
   Elts.push_back(GetTagConstant(VMContext, Tag));
   Elts.push_back(Scope);
   Elts.push_back(MDString::get(VMContext, Name));
   Elts.push_back(F);
-  Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo));
+  Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))));
   Elts.push_back(Ty);
-  Elts.append(Addr, Addr+NumAddr);
+  Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
+  Elts.append(Addr.begin(), Addr.end());
 
-  return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size()));
+  return DIVariable(MDNode::get(VMContext, Elts));
 }
 
 /// createFunction - Create a new descriptor for the specified function.
@@ -641,8 +670,9 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
                                        DIType Ty,
                                        bool isLocalToUnit, bool isDefinition,
                                        unsigned Flags, bool isOptimized,
-                                       Function *Fn) {
-
+                                       Function *Fn,
+                                       MDNode *TParams,
+                                       MDNode *Decl) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -660,9 +690,11 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
     ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
-    Fn
+    Fn,
+    TParams,
+    Decl
   };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  MDNode *Node = MDNode::get(VMContext, Elts);
 
   // Create a named metadata so that we do not lose this mdnode.
   NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
@@ -682,7 +714,8 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
                                      MDNode *VTableHolder,
                                      unsigned Flags,
                                      bool isOptimized,
-                                     Function *Fn) {
+                                     Function *Fn,
+                                     MDNode *TParam) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -700,9 +733,10 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
     VTableHolder,
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
     ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
-    Fn
+    Fn,
+    TParam,
   };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  MDNode *Node = MDNode::get(VMContext, Elts);
 
   // Create a named metadata so that we do not lose this mdnode.
   NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
@@ -721,7 +755,7 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
   };
-  return DINameSpace(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DINameSpace(MDNode::get(VMContext, Elts));
 }
 
 DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
@@ -736,7 +770,7 @@ DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
   };
-  return DILexicalBlock(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DILexicalBlock(MDNode::get(VMContext, Elts));
 }
 
 /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
@@ -747,7 +781,7 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
-  Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo };
+  Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
   return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
 }
 
@@ -759,7 +793,7 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
-  Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo };
+  Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
 
   // If this block already has a terminator then insert this intrinsic
   // before the terminator.
@@ -778,7 +812,7 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
-  Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
+  Value *Args[] = { MDNode::get(V->getContext(), V),
                     ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
                     VarInfo };
   return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore);
@@ -793,7 +827,7 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
-  Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
+  Value *Args[] = { MDNode::get(V->getContext(), V),
                     ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
                     VarInfo };
   return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 9db1456..67f8147 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -725,484 +725,6 @@ void DIVariable::dump() const {
   print(dbgs()); dbgs() << '\n';
 }
 
-//===----------------------------------------------------------------------===//
-// DIFactory: Basic Helpers
-//===----------------------------------------------------------------------===//
-
-DIFactory::DIFactory(Module &m)
-  : M(m), VMContext(M.getContext()), DeclareFn(0), ValueFn(0) {}
-
-Constant *DIFactory::GetTagConstant(unsigned TAG) {
-  assert((TAG & LLVMDebugVersionMask) == 0 &&
-         "Tag too large for debug encoding!");
-  return ConstantInt::get(Type::getInt32Ty(VMContext), TAG | LLVMDebugVersion);
-}
-
-//===----------------------------------------------------------------------===//
-// DIFactory: Primary Constructors
-//===----------------------------------------------------------------------===//
-
-/// GetOrCreateArray - Create an descriptor for an array of descriptors.
-/// This implicitly uniques the arrays created.
-DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) {
-  if (NumTys == 0) {
-    Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext));
-    return DIArray(MDNode::get(VMContext, &Null, 1));
-  }
-
-  SmallVector<Value *, 16> Elts(Tys, Tys+NumTys);
-  return DIArray(MDNode::get(VMContext, Elts.data(), Elts.size()));
-}
-
-/// GetOrCreateSubrange - Create a descriptor for a value range.  This
-/// implicitly uniques the values returned.
-DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) {
-  Value *Elts[] = {
-    GetTagConstant(dwarf::DW_TAG_subrange_type),
-    ConstantInt::get(Type::getInt64Ty(VMContext), Lo),
-    ConstantInt::get(Type::getInt64Ty(VMContext), Hi)
-  };
-
-  return DISubrange(MDNode::get(VMContext, &Elts[0], 3));
-}
-
-/// CreateUnspecifiedParameter - Create unspeicified type descriptor
-/// for the subroutine type.
-DIDescriptor DIFactory::CreateUnspecifiedParameter() {
-  Value *Elts[] = {
-    GetTagConstant(dwarf::DW_TAG_unspecified_parameters)
-  };
-  return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1));
-}
-
-/// CreateCompileUnit - Create a new descriptor for the specified compile
-/// unit.  Note that this does not unique compile units within the module.
-DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID,
-                                           StringRef Filename,
-                                           StringRef Directory,
-                                           StringRef Producer,
-                                           bool isMain,
-                                           bool isOptimized,
-                                           StringRef Flags,
-                                           unsigned RunTimeVer) {
-  Value *Elts[] = {
-    GetTagConstant(dwarf::DW_TAG_compile_unit),
-    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    ConstantInt::get(Type::getInt32Ty(VMContext), LangID),
-    MDString::get(VMContext, Filename),
-    MDString::get(VMContext, Directory),
-    MDString::get(VMContext, Producer),
-    ConstantInt::get(Type::getInt1Ty(VMContext), isMain),
-    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
-    MDString::get(VMContext, Flags),
-    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer)
-  };
-
-  return DICompileUnit(MDNode::get(VMContext, &Elts[0], 10));
-}
-
-/// CreateFile -  Create a new descriptor for the specified file.
-DIFile DIFactory::CreateFile(StringRef Filename,
-                             StringRef Directory,
-                             DICompileUnit CU) {
-  Value *Elts[] = {
-    GetTagConstant(dwarf::DW_TAG_file_type),
-    MDString::get(VMContext, Filename),
-    MDString::get(VMContext, Directory),
-    CU
-  };
-
-  return DIFile(MDNode::get(VMContext, &Elts[0], 4));
-}
-
-/// CreateEnumerator - Create a single enumerator value.
-DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){
-  Value *Elts[] = {
-    GetTagConstant(dwarf::DW_TAG_enumerator),
-    MDString::get(VMContext, Name),
-    ConstantInt::get(Type::getInt64Ty(VMContext), Val)
-  };
-  return DIEnumerator(MDNode::get(VMContext, &Elts[0], 3));
-}
-
-
-/// CreateBasicType - Create a basic type like int, float, etc.
-DIBasicType DIFactory::CreateBasicType(DIDescriptor Context,
-                                       StringRef Name,
-                                       DIFile F,
-                                       unsigned LineNumber,
-                                       uint64_t SizeInBits,
-                                       uint64_t AlignInBits,
-                                       uint64_t OffsetInBits, unsigned Flags,
-                                       unsigned Encoding) {
-  Value *Elts[] = {
-    GetTagConstant(dwarf::DW_TAG_base_type),
-    Context,
-    MDString::get(VMContext, Name),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
-  };
-  return DIBasicType(MDNode::get(VMContext, &Elts[0], 10));
-}
-
-
-/// CreateBasicType - Create a basic type like int, float, etc.
-DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context,
-                                         StringRef Name,
-                                         DIFile F,
-                                         unsigned LineNumber,
-                                         Constant *SizeInBits,
-                                         Constant *AlignInBits,
-                                         Constant *OffsetInBits, unsigned Flags,
-                                         unsigned Encoding) {
-  Value *Elts[] = {
-    GetTagConstant(dwarf::DW_TAG_base_type),
-    Context,
-    MDString::get(VMContext, Name),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    SizeInBits,
-    AlignInBits,
-    OffsetInBits,
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
-  };
-  return DIBasicType(MDNode::get(VMContext, &Elts[0], 10));
-}
-
-/// CreateArtificialType - Create a new DIType with "artificial" flag set.
-DIType DIFactory::CreateArtificialType(DIType Ty) {
-  if (Ty.isArtificial())
-    return Ty;
-
-  SmallVector<Value *, 9> Elts;
-  MDNode *N = Ty;
-  assert (N && "Unexpected input DIType!");
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    if (Value *V = N->getOperand(i))
-      Elts.push_back(V);
-    else
-      Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
-  }
-
-  unsigned CurFlags = Ty.getFlags();
-  CurFlags = CurFlags | DIType::FlagArtificial;
-
-  // Flags are stored at this slot.
-  Elts[8] =  ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
-
-  return DIType(MDNode::get(VMContext, Elts.data(), Elts.size()));
-}
-
-/// CreateDerivedType - Create a derived type like const qualified type,
-/// pointer, typedef, etc.
-DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
-                                           DIDescriptor Context,
-                                           StringRef Name,
-                                           DIFile F,
-                                           unsigned LineNumber,
-                                           uint64_t SizeInBits,
-                                           uint64_t AlignInBits,
-                                           uint64_t OffsetInBits,
-                                           unsigned Flags,
-                                           DIType DerivedFrom) {
-  Value *Elts[] = {
-    GetTagConstant(Tag),
-    Context,
-    MDString::get(VMContext, Name),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    DerivedFrom,
-  };
-  return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10));
-}
-
-
-/// CreateDerivedType - Create a derived type like const qualified type,
-/// pointer, typedef, etc.
-DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag,
-                                             DIDescriptor Context,
-                                             StringRef Name,
-                                             DIFile F,
-                                             unsigned LineNumber,
-                                             Constant *SizeInBits,
-                                             Constant *AlignInBits,
-                                             Constant *OffsetInBits,
-                                             unsigned Flags,
-                                             DIType DerivedFrom) {
-  Value *Elts[] = {
-    GetTagConstant(Tag),
-    Context,
-    MDString::get(VMContext, Name),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    SizeInBits,
-    AlignInBits,
-    OffsetInBits,
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    DerivedFrom,
-  };
-  return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10));
-}
-
-
-/// CreateCompositeType - Create a composite type like array, struct, etc.
-DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
-                                               DIDescriptor Context,
-                                               StringRef Name,
-                                               DIFile F,
-                                               unsigned LineNumber,
-                                               uint64_t SizeInBits,
-                                               uint64_t AlignInBits,
-                                               uint64_t OffsetInBits,
-                                               unsigned Flags,
-                                               DIType DerivedFrom,
-                                               DIArray Elements,
-                                               unsigned RuntimeLang,
-                                               MDNode *ContainingType) {
-
-  Value *Elts[] = {
-    GetTagConstant(Tag),
-    Context,
-    MDString::get(VMContext, Name),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    DerivedFrom,
-    Elements,
-    ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang),
-    ContainingType
-  };
-
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], 13);
-  // Create a named metadata so that we do not lose this enum info.
-  if (Tag == dwarf::DW_TAG_enumeration_type) {
-    NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
-    NMD->addOperand(Node);
-  }
-  return DICompositeType(Node);
-}
-
-/// CreateTemporaryType - Create a temporary forward-declared type.
-DIType DIFactory::CreateTemporaryType() {
-  // Give the temporary MDNode a tag. It doesn't matter what tag we
-  // use here as long as DIType accepts it.
-  Value *Elts[] = {
-    GetTagConstant(DW_TAG_base_type)
-  };
-  MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
-  return DIType(Node);
-}
-
-/// CreateTemporaryType - Create a temporary forward-declared type.
-DIType DIFactory::CreateTemporaryType(DIFile F) {
-  // Give the temporary MDNode a tag. It doesn't matter what tag we
-  // use here as long as DIType accepts it.
-  Value *Elts[] = {
-    GetTagConstant(DW_TAG_base_type),
-    F.getCompileUnit(),
-    NULL,
-    F
-  };
-  MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
-  return DIType(Node);
-}
-
-/// CreateCompositeType - Create a composite type like array, struct, etc.
-DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
-                                                 DIDescriptor Context,
-                                                 StringRef Name,
-                                                 DIFile F,
-                                                 unsigned LineNumber,
-                                                 Constant *SizeInBits,
-                                                 Constant *AlignInBits,
-                                                 Constant *OffsetInBits,
-                                                 unsigned Flags,
-                                                 DIType DerivedFrom,
-                                                 DIArray Elements,
-                                                 unsigned RuntimeLang,
-                                                 MDNode *ContainingType) {
-  Value *Elts[] = {
-    GetTagConstant(Tag),
-    Context,
-    MDString::get(VMContext, Name),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    SizeInBits,
-    AlignInBits,
-    OffsetInBits,
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    DerivedFrom,
-    Elements,
-    ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang),
-    ContainingType
-  };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], 13);
-  // Create a named metadata so that we do not lose this enum info.
-  if (Tag == dwarf::DW_TAG_enumeration_type) {
-    NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
-    NMD->addOperand(Node);
-  }
-  return DICompositeType(Node);
-}
-
-
-/// CreateSubprogram - Create a new descriptor for the specified subprogram.
-/// See comments in DISubprogram for descriptions of these fields.  This
-/// method does not unique the generated descriptors.
-DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
-                                         StringRef Name,
-                                         StringRef DisplayName,
-                                         StringRef LinkageName,
-                                         DIFile F,
-                                         unsigned LineNo, DIType Ty,
-                                         bool isLocalToUnit,
-                                         bool isDefinition,
-                                         unsigned VK, unsigned VIndex,
-                                         DIType ContainingType,
-                                         unsigned Flags,
-                                         bool isOptimized,
-                                         Function *Fn) {
-
-  Value *Elts[] = {
-    GetTagConstant(dwarf::DW_TAG_subprogram),
-    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    Context,
-    MDString::get(VMContext, Name),
-    MDString::get(VMContext, DisplayName),
-    MDString::get(VMContext, LinkageName),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
-    Ty,
-    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
-    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
-    ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
-    ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
-    ContainingType,
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
-    Fn
-  };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], 17);
-
-  // Create a named metadata so that we do not lose this mdnode.
-  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
-  NMD->addOperand(Node);
-  return DISubprogram(Node);
-}
-
-/// CreateSubprogramDefinition - Create new subprogram descriptor for the
-/// given declaration.
-DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration){
-  if (SPDeclaration.isDefinition())
-    return DISubprogram(SPDeclaration);
-
-  MDNode *DeclNode = SPDeclaration;
-  Value *Elts[] = {
-    GetTagConstant(dwarf::DW_TAG_subprogram),
-    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    DeclNode->getOperand(2), // Context
-    DeclNode->getOperand(3), // Name
-    DeclNode->getOperand(4), // DisplayName
-    DeclNode->getOperand(5), // LinkageName
-    DeclNode->getOperand(6), // CompileUnit
-    DeclNode->getOperand(7), // LineNo
-    DeclNode->getOperand(8), // Type
-    DeclNode->getOperand(9), // isLocalToUnit
-    ConstantInt::get(Type::getInt1Ty(VMContext), true),
-    DeclNode->getOperand(11), // Virtuality
-    DeclNode->getOperand(12), // VIndex
-    DeclNode->getOperand(13), // Containting Type
-    DeclNode->getOperand(14), // Flags
-    DeclNode->getOperand(15), // isOptimized
-    SPDeclaration.getFunction()
-  };
-  MDNode *Node =MDNode::get(VMContext, &Elts[0], 16);
-
-  // Create a named metadata so that we do not lose this mdnode.
-  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
-  NMD->addOperand(Node);
-  return DISubprogram(Node);
-}
-
-/// CreateGlobalVariable - Create a new descriptor for the specified global.
-DIGlobalVariable
-DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
-                                StringRef DisplayName,
-                                StringRef LinkageName,
-                                DIFile F,
-                                unsigned LineNo, DIType Ty,bool isLocalToUnit,
-                                bool isDefinition, llvm::GlobalVariable *Val) {
-  Value *Elts[] = {
-    GetTagConstant(dwarf::DW_TAG_variable),
-    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    Context,
-    MDString::get(VMContext, Name),
-    MDString::get(VMContext, DisplayName),
-    MDString::get(VMContext, LinkageName),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
-    Ty,
-    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
-    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
-    Val
-  };
-
-  Value *const *Vs = &Elts[0];
-  MDNode *Node = MDNode::get(VMContext,Vs, 12);
-
-  // Create a named metadata so that we do not lose this mdnode.
-  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
-  NMD->addOperand(Node);
-
-  return DIGlobalVariable(Node);
-}
-
-/// CreateGlobalVariable - Create a new descriptor for the specified constant.
-DIGlobalVariable
-DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
-                                StringRef DisplayName,
-                                StringRef LinkageName,
-                                DIFile F,
-                                unsigned LineNo, DIType Ty,bool isLocalToUnit,
-                                bool isDefinition, llvm::Constant *Val) {
-  Value *Elts[] = {
-    GetTagConstant(dwarf::DW_TAG_variable),
-    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    Context,
-    MDString::get(VMContext, Name),
-    MDString::get(VMContext, DisplayName),
-    MDString::get(VMContext, LinkageName),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
-    Ty,
-    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
-    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
-    Val
-  };
-
-  Value *const *Vs = &Elts[0];
-  MDNode *Node = MDNode::get(VMContext,Vs, 12);
-
-  // Create a named metadata so that we do not lose this mdnode.
-  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
-  NMD->addOperand(Node);
-
-  return DIGlobalVariable(Node);
-}
-
 /// fixupObjcLikeName - Replace contains special characters used
 /// in a typical Objective-C names with '.' in a given string.
 static void fixupObjcLikeName(std::string &Str) {
@@ -1214,19 +736,6 @@ static void fixupObjcLikeName(std::string &Str) {
   }
 }
 
-/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
-/// to hold function specific information.
-NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) {
-  SmallString<32> Out;
-  if (FuncName.find('[') == StringRef::npos)
-    return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FuncName)
-                                      .toStringRef(Out)); 
-  std::string Name = FuncName;
-  fixupObjcLikeName(Name);
-  return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", Name)
-                                    .toStringRef(Out));
-}
-
 /// getFnSpecificMDNode - Return a NameMDNode, if available, that is 
 /// suitable to hold function specific information.
 NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) {
@@ -1237,178 +746,18 @@ NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) {
   return M.getNamedMetadata(Twine("llvm.dbg.lv.", Name));
 }
 
-/// CreateVariable - Create a new descriptor for the specified variable.
-DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
-                                     StringRef Name,
-                                     DIFile F,
-                                     unsigned LineNo,
-                                     DIType Ty, bool AlwaysPreserve,
-                                     unsigned Flags) {
-  Value *Elts[] = {
-    GetTagConstant(Tag),
-    Context,
-    MDString::get(VMContext, Name),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
-    Ty,
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags)
-  };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], 7);
-  if (AlwaysPreserve) {
-    // The optimizer may remove local variable. If there is an interest
-    // to preserve variable info in such situation then stash it in a
-    // named mdnode.
-    DISubprogram Fn(getDISubprogram(Context));
-    StringRef FName = "fn";
-    if (Fn.getFunction())
-      FName = Fn.getFunction()->getName();
-    char One = '\1';
-    if (FName.startswith(StringRef(&One, 1)))
-      FName = FName.substr(1);
-
-
-    NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName);
-    FnLocals->addOperand(Node);
-  }
-  return DIVariable(Node);
-}
-
-
-/// CreateComplexVariable - Create a new descriptor for the specified variable
-/// which has a complex address expression for its address.
-DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context,
-                                            StringRef Name, DIFile F,
-                                            unsigned LineNo,
-                                            DIType Ty, Value *const *Addr,
-                                            unsigned NumAddr) {
-  SmallVector<Value *, 15> Elts;
-  Elts.push_back(GetTagConstant(Tag));
-  Elts.push_back(Context);
-  Elts.push_back(MDString::get(VMContext, Name));
-  Elts.push_back(F);
-  Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo));
-  Elts.push_back(Ty);
-  Elts.append(Addr, Addr+NumAddr);
-
-  return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size()));
-}
-
-
-/// CreateBlock - This creates a descriptor for a lexical block with the
-/// specified parent VMContext.
-DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context,
-                                             DIFile F, unsigned LineNo,
-                                             unsigned Col) {
-  // Defeat MDNode uniqing for lexical blocks.
-  static unsigned int unique_id = 0;
-  Value *Elts[] = {
-    GetTagConstant(dwarf::DW_TAG_lexical_block),
-    Context,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Col),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
-  };
-  return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 6));
-}
-
-/// CreateNameSpace - This creates new descriptor for a namespace
-/// with the specified parent context.
-DINameSpace DIFactory::CreateNameSpace(DIDescriptor Context, StringRef Name,
-                                       DIFile F,
-                                       unsigned LineNo) {
-  Value *Elts[] = {
-    GetTagConstant(dwarf::DW_TAG_namespace),
-    Context,
-    MDString::get(VMContext, Name),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
-  };
-  return DINameSpace(MDNode::get(VMContext, &Elts[0], 5));
-}
-
-/// CreateLocation - Creates a debug info location.
-DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
-                                     DIScope S, DILocation OrigLoc) {
-  Value *Elts[] = {
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
-    ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo),
-    S,
-    OrigLoc,
-  };
-  return DILocation(MDNode::get(VMContext, &Elts[0], 4));
-}
-
-//===----------------------------------------------------------------------===//
-// DIFactory: Routines for inserting code into a function
-//===----------------------------------------------------------------------===//
-
-/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
-                                      Instruction *InsertBefore) {
-  assert(Storage && "no storage passed to dbg.declare");
-  assert(D.Verify() && "empty DIVariable passed to dbg.declare");
-  if (!DeclareFn)
-    DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
-
-  Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1),
-                    D };
-  return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
-}
-
-/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
-                                      BasicBlock *InsertAtEnd) {
-  assert(Storage && "no storage passed to dbg.declare");
-  assert(D.Verify() && "invalid DIVariable passed to dbg.declare");
-  if (!DeclareFn)
-    DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
-
-  Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1),
-                    D };
-
-  // If this block already has a terminator then insert this intrinsic
-  // before the terminator.
-  if (TerminatorInst *T = InsertAtEnd->getTerminator())
-    return CallInst::Create(DeclareFn, Args, Args+2, "", T);
-  else
-    return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);}
-
-/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
-Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
-                                                DIVariable D,
-                                                Instruction *InsertBefore) {
-  assert(V && "no value passed to dbg.value");
-  assert(D.Verify() && "invalid DIVariable passed to dbg.value");
-  if (!ValueFn)
-    ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
-
-  Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
-                    ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
-                    D };
-  return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore);
-}
-
-/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
-Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
-                                                DIVariable D,
-                                                BasicBlock *InsertAtEnd) {
-  assert(V && "no value passed to dbg.value");
-  assert(D.Verify() && "invalid DIVariable passed to dbg.value");
-  if (!ValueFn)
-    ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
-
-  Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
-                    ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
-                    D };
-  return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
-}
-
-// RecordType - Record DIType in a module such that it is not lost even if
-// it is not referenced through debug info anchors.
-void DIFactory::RecordType(DIType T) {
-  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty");
-  NMD->addOperand(T);
+/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
+/// to hold function specific information.
+NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) {
+  SmallString<32> Out;
+  if (FuncName.find('[') == StringRef::npos)
+    return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FuncName)
+                                      .toStringRef(Out));
+  
+  std::string Name = FuncName;
+  fixupObjcLikeName(Name);
+  return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", Name)
+                                    .toStringRef(Out));
 }
 
 
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 116aaf4..b226d66 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -602,7 +602,7 @@ void GlobalsModRef::addEscapingUse(Use &U) {
   // For the purposes of this analysis, it is conservatively correct to treat
   // a newly escaping value equivalently to a deleted one.  We could perhaps
   // be more precise by processing the new use and attempting to update our
-  // saved analysis results to accomodate it.
+  // saved analysis results to accommodate it.
   deleteValue(U);
   
   AliasAnalysis::addEscapingUse(U);
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index c838218..2cda791 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
@@ -83,7 +84,10 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     return false;   // Void and FP expressions cannot be reduced.
 
   // LSR is not APInt clean, do not touch integers bigger than 64-bits.
-  if (SE->getTypeSizeInBits(I->getType()) > 64)
+  // Also avoid creating IVs of non-native types. For example, we don't want a
+  // 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
+  uint64_t Width = SE->getTypeSizeInBits(I->getType());
+  if (Width > 64 || (TD && !TD->isLegalInteger(Width)))
     return false;
 
   if (!Processed.insert(I))
@@ -167,6 +171,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
   LI = &getAnalysis<LoopInfo>();
   DT = &getAnalysis<DominatorTree>();
   SE = &getAnalysis<ScalarEvolution>();
+  TD = getAnalysisIfAvailable<TargetData>();
 
   // Find all uses of induction variables in this loop, and categorize
   // them by stride.  Start by finding all of the PHI nodes in the header for
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 47f91cf..a820ecf 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -501,7 +501,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
     return InlineCost::getAlways();
     
   if (CalleeFI->Metrics.usesDynamicAlloca) {
-    // Get infomation about the caller.
+    // Get information about the caller.
     FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
 
     // If we haven't calculated this information yet, do so now.
@@ -549,7 +549,7 @@ InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
 
   int Cost = 0;
   
-  // Look at the orginal size of the callee.  Each instruction counts as 5.
+  // Look at the original size of the callee.  Each instruction counts as 5.
   Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
 
   // Offset that with the amount of code that can be constant-folded
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 982dacb..9d6d339 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -18,11 +18,13 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "instsimplify"
+#include "llvm/Operator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Target/TargetData.h"
@@ -899,6 +901,111 @@ Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD,
   return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit);
 }
 
+/// SimplifyRem - Given operands for an SRem or URem, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+                          const TargetData *TD, const DominatorTree *DT,
+                          unsigned MaxRecurse) {
+  if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+    if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { C0, C1 };
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+    }
+  }
+
+  bool isSigned = Opcode == Instruction::SRem;
+
+  // X % undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // undef % X -> 0
+  if (match(Op0, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // 0 % X -> 0, we don't need to preserve faults!
+  if (match(Op0, m_Zero()))
+    return Op0;
+
+  // X % 0 -> undef, we don't need to preserve faults!
+  if (match(Op1, m_Zero()))
+    return UndefValue::get(Op0->getType());
+
+  // X % 1 -> 0
+  if (match(Op1, m_One()))
+    return Constant::getNullValue(Op0->getType());
+
+  if (Op0->getType()->isIntegerTy(1))
+    // It can't be remainder by zero, hence it must be remainder by one.
+    return Constant::getNullValue(Op0->getType());
+
+  // X % X -> 0
+  if (Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+/// SimplifySRemInst - Given operands for an SRem, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                               const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyURemInst - Given operands for a URem, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                               const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
+                               const DominatorTree *, unsigned) {
+  // undef % X -> undef    (the undef could be a snan).
+  if (match(Op0, m_Undef()))
+    return Op0;
+
+  // X % undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  return 0;
+}
+
+Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
 /// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
@@ -1343,7 +1450,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   // the compare, and if only one of them is then we moved it to RHS already.
   if (isa<AllocaInst>(LHS) && (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) ||
                                isa<ConstantPointerNull>(RHS)))
-    // We already know that LHS != LHS.
+    // We already know that LHS != RHS.
     return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
 
   // If we are comparing with zero then try hard since this is a common case.
@@ -1399,40 +1506,66 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 
   // See if we are doing a comparison with a constant integer.
   if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
-    switch (Pred) {
-    default: break;
-    case ICmpInst::ICMP_UGT:
-      if (CI->isMaxValue(false))                 // A >u MAX -> FALSE
-        return ConstantInt::getFalse(CI->getContext());
-      break;
-    case ICmpInst::ICMP_UGE:
-      if (CI->isMinValue(false))                 // A >=u MIN -> TRUE
-        return ConstantInt::getTrue(CI->getContext());
-      break;
-    case ICmpInst::ICMP_ULT:
-      if (CI->isMinValue(false))                 // A <u MIN -> FALSE
-        return ConstantInt::getFalse(CI->getContext());
-      break;
-    case ICmpInst::ICMP_ULE:
-      if (CI->isMaxValue(false))                 // A <=u MAX -> TRUE
-        return ConstantInt::getTrue(CI->getContext());
-      break;
-    case ICmpInst::ICMP_SGT:
-      if (CI->isMaxValue(true))                  // A >s MAX -> FALSE
-        return ConstantInt::getFalse(CI->getContext());
-      break;
-    case ICmpInst::ICMP_SGE:
-      if (CI->isMinValue(true))                  // A >=s MIN -> TRUE
-        return ConstantInt::getTrue(CI->getContext());
-      break;
-    case ICmpInst::ICMP_SLT:
-      if (CI->isMinValue(true))                  // A <s MIN -> FALSE
-        return ConstantInt::getFalse(CI->getContext());
-      break;
-    case ICmpInst::ICMP_SLE:
-      if (CI->isMaxValue(true))                  // A <=s MAX -> TRUE
-        return ConstantInt::getTrue(CI->getContext());
-      break;
+    // Rule out tautological comparisons (eg., ult 0 or uge 0).
+    ConstantRange RHS_CR = ICmpInst::makeConstantRange(Pred, CI->getValue());
+    if (RHS_CR.isEmptySet())
+      return ConstantInt::getFalse(CI->getContext());
+    if (RHS_CR.isFullSet())
+      return ConstantInt::getTrue(CI->getContext());
+
+    // Many binary operators with constant RHS have easy to compute constant
+    // range.  Use them to check whether the comparison is a tautology.
+    uint32_t Width = CI->getBitWidth();
+    APInt Lower = APInt(Width, 0);
+    APInt Upper = APInt(Width, 0);
+    ConstantInt *CI2;
+    if (match(LHS, m_URem(m_Value(), m_ConstantInt(CI2)))) {
+      // 'urem x, CI2' produces [0, CI2).
+      Upper = CI2->getValue();
+    } else if (match(LHS, m_SRem(m_Value(), m_ConstantInt(CI2)))) {
+      // 'srem x, CI2' produces (-|CI2|, |CI2|).
+      Upper = CI2->getValue().abs();
+      Lower = (-Upper) + 1;
+    } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) {
+      // 'udiv x, CI2' produces [0, UINT_MAX / CI2].
+      APInt NegOne = APInt::getAllOnesValue(Width);
+      if (!CI2->isZero())
+        Upper = NegOne.udiv(CI2->getValue()) + 1;
+    } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) {
+      // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2].
+      APInt IntMin = APInt::getSignedMinValue(Width);
+      APInt IntMax = APInt::getSignedMaxValue(Width);
+      APInt Val = CI2->getValue().abs();
+      if (!Val.isMinValue()) {
+        Lower = IntMin.sdiv(Val);
+        Upper = IntMax.sdiv(Val) + 1;
+      }
+    } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) {
+      // 'lshr x, CI2' produces [0, UINT_MAX >> CI2].
+      APInt NegOne = APInt::getAllOnesValue(Width);
+      if (CI2->getValue().ult(Width))
+        Upper = NegOne.lshr(CI2->getValue()) + 1;
+    } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) {
+      // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2].
+      APInt IntMin = APInt::getSignedMinValue(Width);
+      APInt IntMax = APInt::getSignedMaxValue(Width);
+      if (CI2->getValue().ult(Width)) {
+        Lower = IntMin.ashr(CI2->getValue());
+        Upper = IntMax.ashr(CI2->getValue()) + 1;
+      }
+    } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) {
+      // 'or x, CI2' produces [CI2, UINT_MAX].
+      Lower = CI2->getValue();
+    } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) {
+      // 'and x, CI2' produces [0, CI2].
+      Upper = CI2->getValue() + 1;
+    }
+    if (Lower != Upper) {
+      ConstantRange LHS_CR = ConstantRange(Lower, Upper);
+      if (RHS_CR.contains(LHS_CR))
+        return ConstantInt::getTrue(RHS->getContext());
+      if (RHS_CR.inverse().contains(LHS_CR))
+        return ConstantInt::getFalse(RHS->getContext());
     }
   }
 
@@ -1644,6 +1777,93 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     }
   }
 
+  if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) {
+    bool KnownNonNegative, KnownNegative;
+    switch (Pred) {
+    default:
+      break;
+    case ICmpInst::ICMP_SGT:
+    case ICmpInst::ICMP_SGE:
+      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+      if (!KnownNonNegative)
+        break;
+      // fall-through
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_UGT:
+    case ICmpInst::ICMP_UGE:
+      return ConstantInt::getFalse(RHS->getContext());
+    case ICmpInst::ICMP_SLT:
+    case ICmpInst::ICMP_SLE:
+      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+      if (!KnownNonNegative)
+        break;
+      // fall-through
+    case ICmpInst::ICMP_NE:
+    case ICmpInst::ICMP_ULT:
+    case ICmpInst::ICMP_ULE:
+      return ConstantInt::getTrue(RHS->getContext());
+    }
+  }
+  if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) {
+    bool KnownNonNegative, KnownNegative;
+    switch (Pred) {
+    default:
+      break;
+    case ICmpInst::ICMP_SGT:
+    case ICmpInst::ICMP_SGE:
+      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+      if (!KnownNonNegative)
+        break;
+      // fall-through
+    case ICmpInst::ICMP_NE:
+    case ICmpInst::ICMP_UGT:
+    case ICmpInst::ICMP_UGE:
+      return ConstantInt::getTrue(RHS->getContext());
+    case ICmpInst::ICMP_SLT:
+    case ICmpInst::ICMP_SLE:
+      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+      if (!KnownNonNegative)
+        break;
+      // fall-through
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_ULT:
+    case ICmpInst::ICMP_ULE:
+      return ConstantInt::getFalse(RHS->getContext());
+    }
+  }
+
+  if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
+      LBO->getOperand(1) == RBO->getOperand(1)) {
+    switch (LBO->getOpcode()) {
+    default: break;
+    case Instruction::UDiv:
+    case Instruction::LShr:
+      if (ICmpInst::isSigned(Pred))
+        break;
+      // fall-through
+    case Instruction::SDiv:
+    case Instruction::AShr:
+      if (!LBO->isExact() && !RBO->isExact())
+        break;
+      if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+                                      RBO->getOperand(0), TD, DT, MaxRecurse-1))
+        return V;
+      break;
+    case Instruction::Shl: {
+      bool NUW = LBO->hasNoUnsignedWrap() && LBO->hasNoUnsignedWrap();
+      bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap();
+      if (!NUW && !NSW)
+        break;
+      if (!NSW && ICmpInst::isSigned(Pred))
+        break;
+      if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+                                      RBO->getOperand(0), TD, DT, MaxRecurse-1))
+        return V;
+      break;
+    }
+    }
+  }
+
   // If the comparison is with the result of a select instruction, check whether
   // comparing with either branch of the select always yields the same value.
   if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
@@ -1879,6 +2099,9 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
   case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse);
   case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse);
   case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse);
   case Instruction::Shl:
     return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
                            TD, DT, MaxRecurse);
@@ -1973,6 +2196,15 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
   case Instruction::FDiv:
     Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
     break;
+  case Instruction::SRem:
+    Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::URem:
+    Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::FRem:
+    Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
   case Instruction::Shl:
     Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 9e7da6c..d5f0b5c 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -29,7 +29,6 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include <map>
-#include <set>
 #include <stack>
 using namespace llvm;
 
@@ -268,6 +267,8 @@ public:
 } // end anonymous namespace.
 
 namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val)
+    LLVM_ATTRIBUTE_USED;
 raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
   if (Val.isUndefined())
     return OS << "undefined";
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index fc7edc0..f130f30 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -606,7 +606,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
                                     Type::getInt64Ty(V->getContext())))
         return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
     } else if (CE->getOpcode() == Instruction::ExtractValue) {
-      const SmallVector<unsigned, 4> &Indices = CE->getIndices();
+      ArrayRef<unsigned> Indices = CE->getIndices();
       if (Value *W = FindInsertedValue(CE->getOperand(0),
                                        Indices.begin(),
                                        Indices.end()))
diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp
deleted file mode 100644
index a0e6034..0000000
--- a/lib/Analysis/LiveValues.cpp
+++ /dev/null
@@ -1,200 +0,0 @@
-//===- LiveValues.cpp - Liveness information for LLVM IR Values. ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the implementation for the LLVM IR Value liveness
-// analysis pass.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/LiveValues.h"
-#include "llvm/Instructions.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/LoopInfo.h"
-using namespace llvm;
-
-namespace llvm {
-  FunctionPass *createLiveValuesPass() { return new LiveValues(); }
-}
-
-char LiveValues::ID = 0;
-INITIALIZE_PASS_BEGIN(LiveValues, "live-values",
-                "Value Liveness Analysis", false, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_END(LiveValues, "live-values",
-                "Value Liveness Analysis", false, true)
-
-LiveValues::LiveValues() : FunctionPass(ID) {
-  initializeLiveValuesPass(*PassRegistry::getPassRegistry());
-}
-
-void LiveValues::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<DominatorTree>();
-  AU.addRequired<LoopInfo>();
-  AU.setPreservesAll();
-}
-
-bool LiveValues::runOnFunction(Function &F) {
-  DT = &getAnalysis<DominatorTree>();
-  LI = &getAnalysis<LoopInfo>();
-
-  // This pass' values are computed lazily, so there's nothing to do here.
-
-  return false;
-}
-
-void LiveValues::releaseMemory() {
-  Memos.clear();
-}
-
-/// isUsedInBlock - Test if the given value is used in the given block.
-///
-bool LiveValues::isUsedInBlock(const Value *V, const BasicBlock *BB) {
-  Memo &M = getMemo(V);
-  return M.Used.count(BB);
-}
-
-/// isLiveThroughBlock - Test if the given value is known to be
-/// live-through the given block, meaning that the block is properly
-/// dominated by the value's definition, and there exists a block
-/// reachable from it that contains a use. This uses a conservative
-/// approximation that errs on the side of returning false.
-///
-bool LiveValues::isLiveThroughBlock(const Value *V,
-                                    const BasicBlock *BB) {
-  Memo &M = getMemo(V);
-  return M.LiveThrough.count(BB);
-}
-
-/// isKilledInBlock - Test if the given value is known to be killed in
-/// the given block, meaning that the block contains a use of the value,
-/// and no blocks reachable from the block contain a use. This uses a
-/// conservative approximation that errs on the side of returning false.
-///
-bool LiveValues::isKilledInBlock(const Value *V, const BasicBlock *BB) {
-  Memo &M = getMemo(V);
-  return M.Killed.count(BB);
-}
-
-/// getMemo - Retrieve an existing Memo for the given value if one
-/// is available, otherwise compute a new one.
-///
-LiveValues::Memo &LiveValues::getMemo(const Value *V) {
-  DenseMap<const Value *, Memo>::iterator I = Memos.find(V);
-  if (I != Memos.end())
-    return I->second;
-  return compute(V);
-}
-
-/// getImmediateDominator - A handy utility for the specific DominatorTree
-/// query that we need here.
-///
-static const BasicBlock *getImmediateDominator(const BasicBlock *BB,
-                                               const DominatorTree *DT) {
-  DomTreeNode *Node = DT->getNode(const_cast<BasicBlock *>(BB))->getIDom();
-  return Node ? Node->getBlock() : 0;
-}
-
-/// compute - Compute a new Memo for the given value.
-///
-LiveValues::Memo &LiveValues::compute(const Value *V) {
-  Memo &M = Memos[V];
-
-  // Determine the block containing the definition.
-  const BasicBlock *DefBB;
-  // Instructions define values with meaningful live ranges.
-  if (const Instruction *I = dyn_cast<Instruction>(V))
-    DefBB = I->getParent();
-  // Arguments can be analyzed as values defined in the entry block.
-  else if (const Argument *A = dyn_cast<Argument>(V))
-    DefBB = &A->getParent()->getEntryBlock();
-  // Constants and other things aren't meaningful here, so just
-  // return having computed an empty Memo so that we don't come
-  // here again. The assumption here is that client code won't
-  // be asking about such values very often.
-  else
-    return M;
-
-  // Determine if the value is defined inside a loop. This is used
-  // to track whether the value is ever used outside the loop, so
-  // it'll be set to null if the value is either not defined in a
-  // loop or used outside the loop in which it is defined.
-  const Loop *L = LI->getLoopFor(DefBB);
-
-  // Track whether the value is used anywhere outside of the block
-  // in which it is defined.
-  bool LiveOutOfDefBB = false;
-
-  // Examine each use of the value.
-  for (Value::const_use_iterator I = V->use_begin(), E = V->use_end();
-       I != E; ++I) {
-    const User *U = *I;
-    const BasicBlock *UseBB = cast<Instruction>(U)->getParent();
-
-    // Note the block in which this use occurs.
-    M.Used.insert(UseBB);
-
-    // If the use block doesn't have successors, the value can be
-    // considered killed.
-    if (succ_begin(UseBB) == succ_end(UseBB))
-      M.Killed.insert(UseBB);
-
-    // Observe whether the value is used outside of the loop in which
-    // it is defined. Switch to an enclosing loop if necessary.
-    for (; L; L = L->getParentLoop())
-      if (L->contains(UseBB))
-        break;
-
-    // Search for live-through blocks.
-    const BasicBlock *BB;
-    if (const PHINode *PHI = dyn_cast<PHINode>(U)) {
-      // For PHI nodes, start the search at the incoming block paired with the
-      // incoming value, which must be dominated by the definition.
-      unsigned Num = PHI->getIncomingValueNumForOperand(I.getOperandNo());
-      BB = PHI->getIncomingBlock(Num);
-
-      // A PHI-node use means the value is live-out of it's defining block
-      // even if that block also contains the only use.
-      LiveOutOfDefBB = true;
-    } else {
-      // Otherwise just start the search at the use.
-      BB = UseBB;
-
-      // Note if the use is outside the defining block.
-      LiveOutOfDefBB |= UseBB != DefBB;
-    }
-
-    // Climb the immediate dominator tree from the use to the definition
-    // and mark all intermediate blocks as live-through.
-    for (; BB != DefBB; BB = getImmediateDominator(BB, DT)) {
-      if (BB != UseBB && !M.LiveThrough.insert(BB))
-        break;
-    }
-  }
-
-  // If the value is defined inside a loop and is not live outside
-  // the loop, then each exit block of the loop in which the value
-  // is used is a kill block.
-  if (L) {
-    SmallVector<BasicBlock *, 4> ExitingBlocks;
-    L->getExitingBlocks(ExitingBlocks);
-    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
-      const BasicBlock *ExitingBlock = ExitingBlocks[i];
-      if (M.Used.count(ExitingBlock))
-        M.Killed.insert(ExitingBlock);
-    }
-  }
-
-  // If the value was never used outside the block in which it was
-  // defined, it's killed in that block.
-  if (!LiveOutOfDefBB)
-    M.Killed.insert(DefBB);
-
-  return M;
-}
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 2ea27fb..ab34fd6 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -17,6 +17,7 @@
 #include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
 using namespace llvm;
 
 /// AreEquivalentAddressValues - Test if A and B will obviously have the same
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 8e1a7bf..10e3f29 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -14,8 +14,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/DebugInfoProbe.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Timer.h"
 using namespace llvm;
 
@@ -52,6 +54,20 @@ char PrintLoopPass::ID = 0;
 }
 
 //===----------------------------------------------------------------------===//
+// DebugInfoProbe
+
+static DebugInfoProbeInfo *TheDebugProbe;
+static void createDebugInfoProbe() {
+  if (TheDebugProbe) return;
+      
+  // Constructed the first time this is called. This guarantees that the 
+  // object will be constructed, if -enable-debug-info-probe is set, 
+  // before static globals, thus it will be destroyed before them.
+  static ManagedStatic<DebugInfoProbeInfo> DIP;
+  TheDebugProbe = &*DIP;
+}
+
+//===----------------------------------------------------------------------===//
 // LPPassManager
 //
 
@@ -223,6 +239,7 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
 bool LPPassManager::runOnFunction(Function &F) {
   LI = &getAnalysis<LoopInfo>();
   bool Changed = false;
+  createDebugInfoProbe();
 
   // Collect inherited analysis from Module level pass manager.
   populateInheritedAnalysis(TPM->activeStack);
@@ -254,19 +271,21 @@ bool LPPassManager::runOnFunction(Function &F) {
     // Run all passes on the current Loop.
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
       LoopPass *P = getContainedPass(Index);
-
       dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
                    CurrentLoop->getHeader()->getName());
       dumpRequiredSet(P);
 
       initializeAnalysisImpl(P);
-
+      if (TheDebugProbe)
+        TheDebugProbe->initialize(P, F);
       {
         PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
         TimeRegion PassTimer(getPassTimer(P));
 
         Changed |= P->runOnLoop(CurrentLoop, *this);
       }
+      if (TheDebugProbe)
+        TheDebugProbe->finalize(P, F);
 
       if (Changed)
         dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 1ab18ca..769c68c 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -35,7 +35,13 @@ static bool isMallocCall(const CallInst *CI) {
     return false;
 
   Function *Callee = CI->getCalledFunction();
-  if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "malloc")
+  if (Callee == 0 || !Callee->isDeclaration())
+    return false;
+  if (Callee->getName() != "malloc" &&
+      Callee->getName() != "_Znwj" && // operator new(unsigned int)
+      Callee->getName() != "_Znwm" && // operator new(unsigned long)
+      Callee->getName() != "_Znaj" && // operator new[](unsigned int)
+      Callee->getName() != "_Znam")   // operator new[](unsigned long)
     return false;
 
   // Check malloc prototype.
@@ -189,7 +195,12 @@ const CallInst *llvm::isFreeCall(const Value *I) {
   if (!CI)
     return 0;
   Function *Callee = CI->getCalledFunction();
-  if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "free")
+  if (Callee == 0 || !Callee->isDeclaration())
+    return 0;
+
+  if (Callee->getName() != "free" &&
+      Callee->getName() != "_ZdlPv" && // operator delete(void*)
+      Callee->getName() != "_ZdaPv")   // operator delete[](void*)
     return 0;
 
   // Check free prototype.
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 35043bd..ce7fab6 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -16,6 +16,7 @@
 
 #define DEBUG_TYPE "memdep"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Function.h"
@@ -221,6 +222,96 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
   return MemDepResult::getClobber(ScanIt);
 }
 
+/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
+/// would fully overlap MemLoc if done as a wider legal integer load.
+///
+/// MemLocBase, MemLocOffset are lazily computed here the first time the
+/// base/offs of memloc is needed.
+static bool 
+isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
+                                       const Value *&MemLocBase,
+                                       int64_t &MemLocOffs,
+                                       const LoadInst *LI,
+                                       const TargetData *TD) {
+  // If we have no target data, we can't do this.
+  if (TD == 0) return false;
+
+  // If we haven't already computed the base/offset of MemLoc, do so now.
+  if (MemLocBase == 0)
+    MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD);
+
+  unsigned Size = MemoryDependenceAnalysis::
+    getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size,
+                                    LI, *TD);
+  return Size != 0;
+}
+
+/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
+/// looks at a memory location for a load (specified by MemLocBase, Offs,
+/// and Size) and compares it against a load.  If the specified load could
+/// be safely widened to a larger integer load that is 1) still efficient,
+/// 2) safe for the target, and 3) would provide the specified memory
+/// location value, then this function returns the size in bytes of the
+/// load width to use.  If not, this returns zero.
+unsigned MemoryDependenceAnalysis::
+getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
+                                unsigned MemLocSize, const LoadInst *LI,
+                                const TargetData &TD) {
+  // We can only extend non-volatile integer loads.
+  if (!isa<IntegerType>(LI->getType()) || LI->isVolatile()) return 0;
+  
+  // Get the base of this load.
+  int64_t LIOffs = 0;
+  const Value *LIBase = 
+    GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD);
+  
+  // If the two pointers are not based on the same pointer, we can't tell that
+  // they are related.
+  if (LIBase != MemLocBase) return 0;
+  
+  // Okay, the two values are based on the same pointer, but returned as
+  // no-alias.  This happens when we have things like two byte loads at "P+1"
+  // and "P+3".  Check to see if increasing the size of the "LI" load up to its
+  // alignment (or the largest native integer type) will allow us to load all
+  // the bits required by MemLoc.
+  
+  // If MemLoc is before LI, then no widening of LI will help us out.
+  if (MemLocOffs < LIOffs) return 0;
+  
+  // Get the alignment of the load in bytes.  We assume that it is safe to load
+  // any legal integer up to this size without a problem.  For example, if we're
+  // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
+  // widen it up to an i32 load.  If it is known 2-byte aligned, we can widen it
+  // to i16.
+  unsigned LoadAlign = LI->getAlignment();
+
+  int64_t MemLocEnd = MemLocOffs+MemLocSize;
+  
+  // If no amount of rounding up will let MemLoc fit into LI, then bail out.
+  if (LIOffs+LoadAlign < MemLocEnd) return 0;
+  
+  // This is the size of the load to try.  Start with the next larger power of
+  // two.
+  unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U;
+  NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
+  
+  while (1) {
+    // If this load size is bigger than our known alignment or would not fit
+    // into a native integer register, then we fail.
+    if (NewLoadByteSize > LoadAlign ||
+        !TD.fitsInLegalInteger(NewLoadByteSize*8))
+      return 0;
+
+    // If a load of this width would include all of MemLoc, then we succeed.
+    if (LIOffs+NewLoadByteSize >= MemLocEnd)
+      return NewLoadByteSize;
+    
+    NewLoadByteSize <<= 1;
+  }
+  
+  return 0;
+}
+
 /// getPointerDependencyFrom - Return the instruction on which a memory
 /// location depends.  If isLoad is true, this routine ignores may-aliases with
 /// read-only operations.  If isLoad is false, this routine ignores may-aliases
@@ -229,58 +320,31 @@ MemDepResult MemoryDependenceAnalysis::
 getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, 
                          BasicBlock::iterator ScanIt, BasicBlock *BB) {
 
-  Value *InvariantTag = 0;
-
+  const Value *MemLocBase = 0;
+  int64_t MemLocOffset = 0;
+  
   // Walk backwards through the basic block, looking for dependencies.
   while (ScanIt != BB->begin()) {
     Instruction *Inst = --ScanIt;
 
-    // If we're in an invariant region, no dependencies can be found before
-    // we pass an invariant-begin marker.
-    if (InvariantTag == Inst) {
-      InvariantTag = 0;
-      continue;
-    }
-    
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
       // Debug intrinsics don't (and can't) cause dependences.
       if (isa<DbgInfoIntrinsic>(II)) continue;
       
-      // If we pass an invariant-end marker, then we've just entered an
-      // invariant region and can start ignoring dependencies.
-      if (II->getIntrinsicID() == Intrinsic::invariant_end) {
-        // FIXME: This only considers queries directly on the invariant-tagged
-        // pointer, not on query pointers that are indexed off of them.  It'd
-        // be nice to handle that at some point.
-        AliasAnalysis::AliasResult R =
-          AA->alias(AliasAnalysis::Location(II->getArgOperand(2)), MemLoc);
-        if (R == AliasAnalysis::MustAlias)
-          InvariantTag = II->getArgOperand(0);
-
-        continue;
-      }
-
       // If we reach a lifetime begin or end marker, then the query ends here
       // because the value is undefined.
       if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
         // FIXME: This only considers queries directly on the invariant-tagged
         // pointer, not on query pointers that are indexed off of them.  It'd
-        // be nice to handle that at some point.
-        AliasAnalysis::AliasResult R =
-          AA->alias(AliasAnalysis::Location(II->getArgOperand(1)), MemLoc);
-        if (R == AliasAnalysis::MustAlias)
+        // be nice to handle that at some point (the right approach is to use
+        // GetPointerBaseWithConstantOffset).
+        if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)),
+                            MemLoc))
           return MemDepResult::getDef(II);
         continue;
       }
     }
 
-    // If we're querying on a load and we're in an invariant region, we're done
-    // at this point. Nothing a load depends on can live in an invariant region.
-    //
-    // FIXME: this will prevent us from returning load/load must-aliases, so GVN
-    // won't remove redundant loads.
-    if (isLoad && InvariantTag) continue;
-
     // Values depend on loads if the pointers are must aliased.  This means that
     // a load depends on another must aliased load from the same value.
     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
@@ -288,27 +352,51 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
       
       // If we found a pointer, check if it could be the same as our pointer.
       AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
-      if (R == AliasAnalysis::NoAlias)
-        continue;
       
-      // May-alias loads don't depend on each other without a dependence.
-      if (isLoad && R != AliasAnalysis::MustAlias)
+      if (isLoad) {
+        if (R == AliasAnalysis::NoAlias) {
+          // If this is an over-aligned integer load (for example,
+          // "load i8* %P, align 4") see if it would obviously overlap with the
+          // queried location if widened to a larger load (e.g. if the queried
+          // location is 1 byte at P+1).  If so, return it as a load/load
+          // clobber result, allowing the client to decide to widen the load if
+          // it wants to.
+          if (const IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
+            if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() &&
+                isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
+                                                       MemLocOffset, LI, TD))
+              return MemDepResult::getClobber(Inst);
+          
+          continue;
+        }
+        
+        // Must aliased loads are defs of each other.
+        if (R == AliasAnalysis::MustAlias)
+          return MemDepResult::getDef(Inst);
+
+        // If we have a partial alias, then return this as a clobber for the
+        // client to handle.
+        if (R == AliasAnalysis::PartialAlias)
+          return MemDepResult::getClobber(Inst);
+        
+        // Random may-alias loads don't depend on each other without a
+        // dependence.
+        continue;
+      }
+
+      // Stores don't depend on other no-aliased accesses.
+      if (R == AliasAnalysis::NoAlias)
         continue;
 
       // Stores don't alias loads from read-only memory.
-      if (!isLoad && AA->pointsToConstantMemory(LoadLoc))
+      if (AA->pointsToConstantMemory(LoadLoc))
         continue;
 
-      // Stores depend on may and must aliased loads, loads depend on must-alias
-      // loads.
+      // Stores depend on may/must aliased loads.
       return MemDepResult::getDef(Inst);
     }
     
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-      // There can't be stores to the value we care about inside an 
-      // invariant region.
-      if (InvariantTag) continue;
-      
       // If alias analysis can tell that this store is guaranteed to not modify
       // the query pointer, ignore it.  Use getModRefInfo to handle cases where
       // the query pointer points to constant memory etc.
@@ -341,8 +429,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
         (isa<CallInst>(Inst) && extractMallocCall(Inst))) {
       const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD);
       
-      if (AccessPtr == Inst ||
-          AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias)
+      if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
         return MemDepResult::getDef(Inst);
       continue;
     }
@@ -353,9 +440,6 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
       // If the call has no effect on the queried pointer, just ignore it.
       continue;
     case AliasAnalysis::Mod:
-      // If we're in an invariant region, we can ignore calls that ONLY
-      // modify the pointer.
-      if (InvariantTag) continue;
       return MemDepResult::getClobber(Inst);
     case AliasAnalysis::Ref:
       // If the call is known to never store to the pointer, and if this is a
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 93da5a4..70dcd0d 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp
index 5d3f6bb..7c584da 100644
--- a/lib/Analysis/PathNumbering.cpp
+++ b/lib/Analysis/PathNumbering.cpp
@@ -38,13 +38,10 @@
 #include "llvm/Support/TypeBuilder.h"
 #include "llvm/Support/raw_ostream.h"
 
-#include <map>
 #include <queue>
-#include <set>
 #include <stack>
 #include <string>
 #include <utility>
-#include <vector>
 #include <sstream>
 
 using namespace llvm;
@@ -286,7 +283,7 @@ void BallLarusDag::calculatePathNumbers() {
       BallLarusEdge* exitEdge = addEdge(node, getExit(), 0);
       exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
 
-      // Counters to handle the possibilty of a multi-graph
+      // Counters to handle the possibility of a multi-graph
       BasicBlock* oldTarget = 0;
       unsigned duplicateNumber = 0;
 
diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp
index c549773..0ae734e 100644
--- a/lib/Analysis/PathProfileVerifier.cpp
+++ b/lib/Analysis/PathProfileVerifier.cpp
@@ -124,7 +124,7 @@ bool PathProfileVerifier::runOnModule (Module &M) {
       ProfilePathEdgeVector* pev = currentPath->getPathEdges();
       DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": "
             << currentPath->getCount() << "\n");
-      // setup the entry edge (normally path profiling doens't care about this)
+      // setup the entry edge (normally path profiling doesn't care about this)
       if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
         edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]]
           += currentPath->getCount();
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index 3f0deab..6ed2729 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -28,7 +28,6 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 char PostDominatorTree::ID = 0;
-char PostDominanceFrontier::ID = 0;
 INITIALIZE_PASS(PostDominatorTree, "postdomtree",
                 "Post-Dominator Tree Construction", true, true)
 
@@ -50,53 +49,3 @@ FunctionPass* llvm::createPostDomTree() {
   return new PostDominatorTree();
 }
 
-//===----------------------------------------------------------------------===//
-//  PostDominanceFrontier Implementation
-//===----------------------------------------------------------------------===//
-
-INITIALIZE_PASS_BEGIN(PostDominanceFrontier, "postdomfrontier",
-                "Post-Dominance Frontier Construction", true, true)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
-INITIALIZE_PASS_END(PostDominanceFrontier, "postdomfrontier",
-                "Post-Dominance Frontier Construction", true, true)
-
-const DominanceFrontier::DomSetType &
-PostDominanceFrontier::calculate(const PostDominatorTree &DT,
-                                 const DomTreeNode *Node) {
-  // Loop over CFG successors to calculate DFlocal[Node]
-  BasicBlock *BB = Node->getBlock();
-  DomSetType &S = Frontiers[BB];       // The new set to fill in...
-  if (getRoots().empty()) return S;
-
-  if (BB)
-    for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB);
-         SI != SE; ++SI) {
-      BasicBlock *P = *SI;
-      // Does Node immediately dominate this predecessor?
-      DomTreeNode *SINode = DT[P];
-      if (SINode && SINode->getIDom() != Node)
-        S.insert(P);
-    }
-
-  // At this point, S is DFlocal.  Now we union in DFup's of our children...
-  // Loop through and visit the nodes that Node immediately dominates (Node's
-  // children in the IDomTree)
-  //
-  for (DomTreeNode::const_iterator
-         NI = Node->begin(), NE = Node->end(); NI != NE; ++NI) {
-    DomTreeNode *IDominee = *NI;
-    const DomSetType &ChildDF = calculate(DT, IDominee);
-
-    DomSetType::const_iterator CDFI = ChildDF.begin(), CDFE = ChildDF.end();
-    for (; CDFI != CDFE; ++CDFI) {
-      if (!DT.properlyDominates(Node, DT[*CDFI]))
-        S.insert(*CDFI);
-    }
-  }
-
-  return S;
-}
-
-FunctionPass* llvm::createPostDomFrontier() {
-  return new PostDominanceFrontier();
-}
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
index 667ee1c..b594e2b 100644
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ b/lib/Analysis/ProfileEstimatorPass.cpp
@@ -140,7 +140,7 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
     // loop, thus the edge is a backedge, continue and do not check if the
     // value is valid.
     if (BBisHeader && BBLoop->contains(*bbi)) {
-      printEdgeError(edge, "but is backedge, continueing");
+      printEdgeError(edge, "but is backedge, continuing");
       continue;
     }
     // If the edges value is missing (and this is no loop header, and this is
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index 36f211e..173de2c 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -309,9 +309,9 @@ void ProfileInfoT<Function,BasicBlock>::
   removeEdge(oldedge);
 }
 
-/// Replaces all occurences of RmBB in the ProfilingInfo with DestBB.
+/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB.
 /// This checks all edges of the function the blocks reside in and replaces the
-/// occurences of RmBB with DestBB.
+/// occurrences of RmBB with DestBB.
 template<>
 void ProfileInfoT<Function,BasicBlock>::
         replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) {
@@ -812,7 +812,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
       }
       if (iw < 0) continue;
 
-      // Check the recieving end of the path if it can handle the flow.
+      // Check the receiving end of the path if it can handle the flow.
       double ow = getExecutionCount(Dest);
       Processed.clear();
       for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp
index 25481b2..eaa38da 100644
--- a/lib/Analysis/ProfileInfoLoader.cpp
+++ b/lib/Analysis/ProfileInfoLoader.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include <cstdio>
 #include <cstdlib>
-#include <map>
 using namespace llvm;
 
 // ByteSwap - Byteswap 'Var' if 'Really' is true.
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index e2f6a8b..52753cb 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -41,16 +41,15 @@ VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo),
 STATISTIC(numRegions,       "The # of regions");
 STATISTIC(numSimpleRegions, "The # of simple regions");
 
-//===----------------------------------------------------------------------===//
-/// PrintStyle - Print region in difference ways.
-enum PrintStyle { PrintNone, PrintBB, PrintRN  };
-
-static cl::opt<enum PrintStyle> printStyle("print-region-style", cl::Hidden,
+static cl::opt<enum Region::PrintStyle> printStyle("print-region-style",
+  cl::Hidden,
   cl::desc("style of printing regions"),
   cl::values(
-    clEnumValN(PrintNone, "none",  "print no details"),
-    clEnumValN(PrintBB, "bb",  "print regions in detail with block_iterator"),
-    clEnumValN(PrintRN, "rn",  "print regions in detail with element_iterator"),
+    clEnumValN(Region::PrintNone, "none",  "print no details"),
+    clEnumValN(Region::PrintBB, "bb",
+               "print regions in detail with block_iterator"),
+    clEnumValN(Region::PrintRN, "rn",
+               "print regions in detail with element_iterator"),
     clEnumValEnd));
 //===----------------------------------------------------------------------===//
 /// Region Implementation
@@ -413,7 +412,8 @@ Region *Region::getExpandedRegion() const {
   return new Region(getEntry(), R->getExit(), RI, DT);
 }
 
-void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const {
+void Region::print(raw_ostream &OS, bool print_tree, unsigned level,
+                   enum PrintStyle Style) const {
   if (print_tree)
     OS.indent(level*2) << "[" << level << "] " << getNameStr();
   else
@@ -422,14 +422,14 @@ void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const {
   OS << "\n";
 
 
-  if (printStyle != PrintNone) {
+  if (Style != PrintNone) {
     OS.indent(level*2) << "{\n";
     OS.indent(level*2 + 2);
 
-    if (printStyle == PrintBB) {
+    if (Style == PrintBB) {
       for (const_block_iterator I = block_begin(), E = block_end(); I!=E; ++I)
         OS << **I << ", "; // TODO: remove the last ","
-    } else if (printStyle == PrintRN) {
+    } else if (Style == PrintRN) {
       for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I)
         OS << **I << ", "; // TODO: remove the last ",
     }
@@ -439,14 +439,14 @@ void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const {
 
   if (print_tree)
     for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
-      (*RI)->print(OS, print_tree, level+1);
+      (*RI)->print(OS, print_tree, level+1, Style);
 
-  if (printStyle != PrintNone)
+  if (Style != PrintNone)
     OS.indent(level*2) << "} \n";
 }
 
 void Region::dump() const {
-  print(dbgs(), true, getDepth());
+  print(dbgs(), true, getDepth(), printStyle.getValue());
 }
 
 void Region::clearNodeCache() {
@@ -714,7 +714,7 @@ void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const {
 
 void RegionInfo::print(raw_ostream &OS, const Module *) const {
   OS << "Region tree:\n";
-  TopLevelRegion->print(OS, true, 0);
+  TopLevelRegion->print(OS, true, 0, printStyle.getValue());
   OS << "End region tree\n";
 }
 
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index 0cf0f90..a1730b0 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -70,6 +70,32 @@ struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
                                                      G->getTopLevelRegion());
   }
 
+  std::string getEdgeAttributes(RegionNode *srcNode,
+    GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfo *RI) {
+
+    RegionNode *destNode = *CI;
+
+    if (srcNode->isSubRegion() || destNode->isSubRegion())
+      return "";
+
+    // In case of a backedge, do not use it to define the layout of the nodes.
+    BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>();
+    BasicBlock *destBB = destNode->getNodeAs<BasicBlock>();
+
+    Region *R = RI->getRegionFor(destBB);
+
+    while (R && R->getParent())
+      if (R->getParent()->getEntry() == destBB)
+        R = R->getParent();
+      else
+        break;
+
+    if (R->getEntry() == destBB && R->contains(srcBB))
+      return "constraint=false";
+
+    return "";
+  }
+
   // Print the cluster of the subregions. This groups the single basic blocks
   // and adds a different background color for each group.
   static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW,
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 62244cc..bab4619 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -157,10 +157,13 @@ void SCEV::print(raw_ostream &OS) const {
     for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
       OS << ",+," << *AR->getOperand(i);
     OS << "}<";
-    if (AR->hasNoUnsignedWrap())
+    if (AR->getNoWrapFlags(FlagNUW))
       OS << "nuw><";
-    if (AR->hasNoSignedWrap())
+    if (AR->getNoWrapFlags(FlagNSW))
       OS << "nsw><";
+    if (AR->getNoWrapFlags(FlagNW) &&
+        !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
+      OS << "nw><";
     WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false);
     OS << ">";
     return;
@@ -203,7 +206,7 @@ void SCEV::print(raw_ostream &OS) const {
       OS << "alignof(" << *AllocTy << ")";
       return;
     }
-  
+
     const Type *CTy;
     Constant *FieldNo;
     if (U->isOffsetOf(CTy, FieldNo)) {
@@ -212,7 +215,7 @@ void SCEV::print(raw_ostream &OS) const {
       OS << ")";
       return;
     }
-  
+
     // Otherwise just print it normally.
     WriteAsOperand(OS, U->getValue(), false);
     return;
@@ -830,7 +833,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
       Operands.push_back(S);
     }
     if (!hasTrunc)
-      return getAddExpr(Operands, false, false);
+      return getAddExpr(Operands);
     UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
   }
 
@@ -845,7 +848,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
       Operands.push_back(S);
     }
     if (!hasTrunc)
-      return getMulExpr(Operands, false, false);
+      return getMulExpr(Operands);
     UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
   }
 
@@ -854,7 +857,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
     SmallVector<const SCEV *, 4> Operands;
     for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
       Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
-    return getAddRecExpr(Operands, AddRec->getLoop());
+    return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
   }
 
   // As a special case, fold trunc(undef) to undef. We don't want to
@@ -926,10 +929,10 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
 
       // If we have special knowledge that this addrec won't overflow,
       // we don't need to do any further analysis.
-      if (AR->hasNoUnsignedWrap())
+      if (AR->getNoWrapFlags(SCEV::FlagNUW))
         return getAddRecExpr(getZeroExtendExpr(Start, Ty),
                              getZeroExtendExpr(Step, Ty),
-                             L);
+                             L, AR->getNoWrapFlags());
 
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
@@ -959,12 +962,14 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
             getAddExpr(getZeroExtendExpr(Start, WideTy),
                        getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
                                   getZeroExtendExpr(Step, WideTy)));
-          if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd)
+          if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+            // Cache knowledge of AR NUW, which is propagated to this AddRec.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
                                  getZeroExtendExpr(Step, Ty),
-                                 L);
-
+                                 L, AR->getNoWrapFlags());
+          }
           // Similar to above, only this time treat the step value as signed.
           // This covers loops that count down.
           const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
@@ -973,11 +978,15 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
             getAddExpr(getZeroExtendExpr(Start, WideTy),
                        getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
                                   getSignExtendExpr(Step, WideTy)));
-          if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd)
+          if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+            // Cache knowledge of AR NW, which is propagated to this AddRec.
+            // Negative step causes unsigned wrap, but it still can't self-wrap.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
                                  getSignExtendExpr(Step, Ty),
-                                 L);
+                                 L, AR->getNoWrapFlags());
+          }
         }
 
         // If the backedge is guarded by a comparison with the pre-inc value
@@ -990,22 +999,29 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
-                                           AR->getPostIncExpr(*this), N)))
+                                           AR->getPostIncExpr(*this), N))) {
+            // Cache knowledge of AR NUW, which is propagated to this AddRec.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
                                  getZeroExtendExpr(Step, Ty),
-                                 L);
+                                 L, AR->getNoWrapFlags());
+          }
         } else if (isKnownNegative(Step)) {
           const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
                                       getSignedRange(Step).getSignedMin());
           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
-                                           AR->getPostIncExpr(*this), N)))
+                                           AR->getPostIncExpr(*this), N))) {
+            // Cache knowledge of AR NW, which is propagated to this AddRec.
+            // Negative step causes unsigned wrap, but it still can't self-wrap.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
                                  getSignExtendExpr(Step, Ty),
-                                 L);
+                                 L, AR->getNoWrapFlags());
+          }
         }
       }
     }
@@ -1080,10 +1096,10 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
 
       // If we have special knowledge that this addrec won't overflow,
       // we don't need to do any further analysis.
-      if (AR->hasNoSignedWrap())
+      if (AR->getNoWrapFlags(SCEV::FlagNSW))
         return getAddRecExpr(getSignExtendExpr(Start, Ty),
                              getSignExtendExpr(Step, Ty),
-                             L);
+                             L, SCEV::FlagNSW);
 
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
@@ -1113,12 +1129,14 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
             getAddExpr(getSignExtendExpr(Start, WideTy),
                        getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
                                   getSignExtendExpr(Step, WideTy)));
-          if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd)
+          if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+            // Cache knowledge of AR NSW, which is propagated to this AddRec.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(getSignExtendExpr(Start, Ty),
                                  getSignExtendExpr(Step, Ty),
-                                 L);
-
+                                 L, AR->getNoWrapFlags());
+          }
           // Similar to above, only this time treat the step value as unsigned.
           // This covers loops that count up with an unsigned step.
           const SCEV *UMul = getMulExpr(CastedMaxBECount, Step);
@@ -1127,11 +1145,14 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
             getAddExpr(getSignExtendExpr(Start, WideTy),
                        getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
                                   getZeroExtendExpr(Step, WideTy)));
-          if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd)
+          if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+            // Cache knowledge of AR NSW, which is propagated to this AddRec.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(getSignExtendExpr(Start, Ty),
                                  getZeroExtendExpr(Step, Ty),
-                                 L);
+                                 L, AR->getNoWrapFlags());
+          }
         }
 
         // If the backedge is guarded by a comparison with the pre-inc value
@@ -1144,22 +1165,28 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) ||
               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT,
-                                           AR->getPostIncExpr(*this), N)))
+                                           AR->getPostIncExpr(*this), N))) {
+            // Cache knowledge of AR NSW, which is propagated to this AddRec.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(getSignExtendExpr(Start, Ty),
                                  getSignExtendExpr(Step, Ty),
-                                 L);
+                                 L, AR->getNoWrapFlags());
+          }
         } else if (isKnownNegative(Step)) {
           const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) -
                                       getSignedRange(Step).getSignedMin());
           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) ||
               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT,
-                                           AR->getPostIncExpr(*this), N)))
+                                           AR->getPostIncExpr(*this), N))) {
+            // Cache knowledge of AR NSW, which is propagated to this AddRec.
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
             // Return the expression with the addrec on the outside.
             return getAddRecExpr(getSignExtendExpr(Start, Ty),
                                  getSignExtendExpr(Step, Ty),
-                                 L);
+                                 L, AR->getNoWrapFlags());
+          }
         }
       }
     }
@@ -1213,7 +1240,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
     for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
          I != E; ++I)
       Ops.push_back(getAnyExtendExpr(*I, Ty));
-    return getAddRecExpr(Ops, AR->getLoop());
+    return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
   }
 
   // As a special case, fold anyext(undef) to undef. We don't want to
@@ -1334,7 +1361,9 @@ namespace {
 /// getAddExpr - Get a canonical add expression, or something simpler if
 /// possible.
 const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
-                                        bool HasNUW, bool HasNSW) {
+                                        SCEV::NoWrapFlags Flags) {
+  assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
+         "only nuw or nsw allowed");
   assert(!Ops.empty() && "Cannot get empty add!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
@@ -1344,8 +1373,11 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
            "SCEVAddExpr operand types don't match!");
 #endif
 
-  // If HasNSW is true and all the operands are non-negative, infer HasNUW.
-  if (!HasNUW && HasNSW) {
+  // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+  // And vice-versa.
+  int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+  SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+  if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
     bool All = true;
     for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
          E = Ops.end(); I != E; ++I)
@@ -1353,7 +1385,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
         All = false;
         break;
       }
-    if (All) HasNUW = true;
+    if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
   }
 
   // Sort by complexity, this groups all similar expression types together.
@@ -1404,7 +1436,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
       FoundMatch = true;
     }
   if (FoundMatch)
-    return getAddExpr(Ops, HasNUW, HasNSW);
+    return getAddExpr(Ops, Flags);
 
   // Check for truncates. If all the operands are truncated from the same
   // type, see if factoring out the truncate would permit the result to be
@@ -1454,7 +1486,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     }
     if (Ok) {
       // Evaluate the expression in the larger type.
-      const SCEV *Fold = getAddExpr(LargeOps, HasNUW, HasNSW);
+      const SCEV *Fold = getAddExpr(LargeOps, Flags);
       // If it folds to something simple, use it. Otherwise, don't.
       if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
         return getTruncateExpr(Fold, DstType);
@@ -1625,9 +1657,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
 
       // Build the new addrec. Propagate the NUW and NSW flags if both the
       // outer add and the inner addrec are guaranteed to have no overflow.
-      const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop,
-                                         HasNUW && AddRec->hasNoUnsignedWrap(),
-                                         HasNSW && AddRec->hasNoSignedWrap());
+      // Always propagate NW.
+      Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
+      const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
 
       // If all of the other operands were loop invariant, we are done.
       if (Ops.size() == 1) return NewRec;
@@ -1668,7 +1700,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
               }
               Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
             }
-        Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop);
+        // Step size has changed, so we cannot guarantee no self-wraparound.
+        Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
         return getAddExpr(Ops);
       }
 
@@ -1692,15 +1725,16 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
                                         O, Ops.size());
     UniqueSCEVs.InsertNode(S, IP);
   }
-  if (HasNUW) S->setHasNoUnsignedWrap(true);
-  if (HasNSW) S->setHasNoSignedWrap(true);
+  S->setNoWrapFlags(Flags);
   return S;
 }
 
 /// getMulExpr - Get a canonical multiply expression, or something simpler if
 /// possible.
 const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
-                                        bool HasNUW, bool HasNSW) {
+                                        SCEV::NoWrapFlags Flags) {
+  assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
+         "only nuw or nsw allowed");
   assert(!Ops.empty() && "Cannot get empty mul!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
@@ -1710,8 +1744,11 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
            "SCEVMulExpr operand types don't match!");
 #endif
 
-  // If HasNSW is true and all the operands are non-negative, infer HasNUW.
-  if (!HasNUW && HasNSW) {
+  // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+  // And vice-versa.
+  int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+  SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+  if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
     bool All = true;
     for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
          E = Ops.end(); I != E; ++I)
@@ -1719,7 +1756,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
         All = false;
         break;
       }
-    if (All) HasNUW = true;
+    if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
   }
 
   // Sort by complexity, this groups all similar expression types together.
@@ -1759,12 +1796,12 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
     } else if (Ops[0]->isAllOnesValue()) {
       // If we have a mul by -1 of an add, try distributing the -1 among the
       // add operands.
-      if (Ops.size() == 2)
+      if (Ops.size() == 2) {
         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
           SmallVector<const SCEV *, 4> NewOps;
           bool AnyFolded = false;
-          for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
-               I != E; ++I) {
+          for (SCEVAddRecExpr::op_iterator I = Add->op_begin(),
+                 E = Add->op_end(); I != E; ++I) {
             const SCEV *Mul = getMulExpr(Ops[0], *I);
             if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
             NewOps.push_back(Mul);
@@ -1772,6 +1809,18 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
           if (AnyFolded)
             return getAddExpr(NewOps);
         }
+        else if (const SCEVAddRecExpr *
+                 AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
+          // Negation preserves a recurrence's no self-wrap property.
+          SmallVector<const SCEV *, 4> Operands;
+          for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(),
+                 E = AddRec->op_end(); I != E; ++I) {
+            Operands.push_back(getMulExpr(Ops[0], *I));
+          }
+          return getAddRecExpr(Operands, AddRec->getLoop(),
+                               AddRec->getNoWrapFlags(SCEV::FlagNW));
+        }
+      }
     }
 
     if (Ops.size() == 1)
@@ -1831,9 +1880,11 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
 
       // Build the new addrec. Propagate the NUW and NSW flags if both the
       // outer mul and the inner addrec are guaranteed to have no overflow.
-      const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop,
-                                         HasNUW && AddRec->hasNoUnsignedWrap(),
-                                         HasNSW && AddRec->hasNoSignedWrap());
+      //
+      // No self-wrap cannot be guaranteed after changing the step size, but
+      // will be inferred if either NUW or NSW is true.
+      Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
+      const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
 
       // If all of the other operands were loop invariant, we are done.
       if (Ops.size() == 1) return NewRec;
@@ -1869,7 +1920,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
                                                getMulExpr(G, B),
                                                getMulExpr(B, D));
               const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep,
-                                                    F->getLoop());
+                                                    F->getLoop(),
+                                                    SCEV::FlagAnyWrap);
               if (Ops.size() == 2) return NewAddRec;
               Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec);
               Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
@@ -1897,8 +1949,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
                                         O, Ops.size());
     UniqueSCEVs.InsertNode(S, IP);
   }
-  if (HasNUW) S->setHasNoUnsignedWrap(true);
-  if (HasNSW) S->setHasNoSignedWrap(true);
+  S->setNoWrapFlags(Flags);
   return S;
 }
 
@@ -1938,11 +1989,12 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
               getZeroExtendExpr(AR, ExtTy) ==
               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
                             getZeroExtendExpr(Step, ExtTy),
-                            AR->getLoop())) {
+                            AR->getLoop(), SCEV::FlagAnyWrap)) {
             SmallVector<const SCEV *, 4> Operands;
             for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
               Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
-            return getAddRecExpr(Operands, AR->getLoop());
+            return getAddRecExpr(Operands, AR->getLoop(),
+                                 SCEV::FlagNW);
           }
       // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
       if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
@@ -1963,7 +2015,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
           }
       }
       // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
-      if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) {
+      if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
         SmallVector<const SCEV *, 4> Operands;
         for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
           Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
@@ -2006,27 +2058,26 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
 
 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
 /// Simplify the expression as much as possible.
-const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start,
-                                           const SCEV *Step, const Loop *L,
-                                           bool HasNUW, bool HasNSW) {
+const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
+                                           const Loop *L,
+                                           SCEV::NoWrapFlags Flags) {
   SmallVector<const SCEV *, 4> Operands;
   Operands.push_back(Start);
   if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
     if (StepChrec->getLoop() == L) {
       Operands.append(StepChrec->op_begin(), StepChrec->op_end());
-      return getAddRecExpr(Operands, L);
+      return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
     }
 
   Operands.push_back(Step);
-  return getAddRecExpr(Operands, L, HasNUW, HasNSW);
+  return getAddRecExpr(Operands, L, Flags);
 }
 
 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
 /// Simplify the expression as much as possible.
 const SCEV *
 ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
-                               const Loop *L,
-                               bool HasNUW, bool HasNSW) {
+                               const Loop *L, SCEV::NoWrapFlags Flags) {
   if (Operands.size() == 1) return Operands[0];
 #ifndef NDEBUG
   const Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
@@ -2040,7 +2091,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
 
   if (Operands.back()->isZero()) {
     Operands.pop_back();
-    return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0}  -->  X
+    return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0}  -->  X
   }
 
   // It's tempting to want to call getMaxBackedgeTakenCount count here and
@@ -2049,8 +2100,11 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
   // meaningful BE count at this point (and if we don't, we'd be stuck
   // with a SCEVCouldNotCompute as the cached BE count).
 
-  // If HasNSW is true and all the operands are non-negative, infer HasNUW.
-  if (!HasNUW && HasNSW) {
+  // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+  // And vice-versa.
+  int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+  SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+  if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
     bool All = true;
     for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(),
          E = Operands.end(); I != E; ++I)
@@ -2058,7 +2112,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
         All = false;
         break;
       }
-    if (All) HasNUW = true;
+    if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
   }
 
   // Canonicalize nested AddRecs in by nesting them in order of loop depth.
@@ -2081,16 +2135,29 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
           break;
         }
       if (AllInvariant) {
-        NestedOperands[0] = getAddRecExpr(Operands, L);
+        // Create a recurrence for the outer loop with the same step size.
+        //
+        // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
+        // inner recurrence has the same property.
+        SCEV::NoWrapFlags OuterFlags =
+          maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
+
+        NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
         AllInvariant = true;
         for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
           if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
             AllInvariant = false;
             break;
           }
-        if (AllInvariant)
+        if (AllInvariant) {
           // Ok, both add recurrences are valid after the transformation.
-          return getAddRecExpr(NestedOperands, NestedLoop, HasNUW, HasNSW);
+          //
+          // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
+          // the outer recurrence has the same property.
+          SCEV::NoWrapFlags InnerFlags =
+            maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
+          return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
+        }
       }
       // Reset Operands to its original state.
       Operands[0] = NestedAR;
@@ -2114,8 +2181,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
                                            O, Operands.size(), L);
     UniqueSCEVs.InsertNode(S, IP);
   }
-  if (HasNUW) S->setHasNoUnsignedWrap(true);
-  if (HasNSW) S->setHasNoSignedWrap(true);
+  S->setNoWrapFlags(Flags);
   return S;
 }
 
@@ -2510,17 +2576,17 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
   return getMinusSCEV(AllOnes, V);
 }
 
-/// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1,
-/// and thus the HasNUW and HasNSW bits apply to the resultant add, not
-/// whether the sub would have overflowed.
+/// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
 const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
-                                          bool HasNUW, bool HasNSW) {
+                                          SCEV::NoWrapFlags Flags) {
+  assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
+
   // Fast path: X - X --> 0.
   if (LHS == RHS)
     return getConstant(LHS->getType(), 0);
 
   // X - Y --> X + -Y
-  return getAddExpr(LHS, getNegativeSCEV(RHS), HasNUW, HasNSW);
+  return getAddExpr(LHS, getNegativeSCEV(RHS), Flags);
 }
 
 /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
@@ -2652,6 +2718,36 @@ const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
   return getUMinExpr(PromotedLHS, PromotedRHS);
 }
 
+/// getPointerBase - Transitively follow the chain of pointer-type operands
+/// until reaching a SCEV that does not have a single pointer operand. This
+/// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
+/// but corner cases do exist.
+const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
+  // A pointer operand may evaluate to a nonpointer expression, such as null.
+  if (!V->getType()->isPointerTy())
+    return V;
+
+  if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
+    return getPointerBase(Cast->getOperand());
+  }
+  else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
+    const SCEV *PtrOp = 0;
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      if ((*I)->getType()->isPointerTy()) {
+        // Cannot find the base of an expression with multiple pointer operands.
+        if (PtrOp)
+          return V;
+        PtrOp = *I;
+      }
+    }
+    if (!PtrOp)
+      return V;
+    return getPointerBase(PtrOp);
+  }
+  return V;
+}
+
 /// PushDefUseChildren - Push users of the given Instruction
 /// onto the given Worklist.
 static void
@@ -2773,44 +2869,34 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
             if (isLoopInvariant(Accum, L) ||
                 (isa<SCEVAddRecExpr>(Accum) &&
                  cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
-              bool HasNUW = false;
-              bool HasNSW = false;
+              SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
 
               // If the increment doesn't overflow, then neither the addrec nor
               // the post-increment will overflow.
               if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
                 if (OBO->hasNoUnsignedWrap())
-                  HasNUW = true;
+                  Flags = setFlags(Flags, SCEV::FlagNUW);
                 if (OBO->hasNoSignedWrap())
-                  HasNSW = true;
-              } else if (const GEPOperator *GEP = 
-                            dyn_cast<GEPOperator>(BEValueV)) {
-                // If the increment is a GEP, then we know it won't perform a
-                // signed overflow, because the address space cannot be
-                // wrapped around.
-                //
-                // NOTE: This isn't strictly true, because you could have an
-                // object straddling the 2G address boundary in a 32-bit address
-                // space (for example).  We really want to model this as a "has
-                // no signed/unsigned wrap" where the base pointer is treated as
-                // unsigned and the increment is known to not have signed
-                // wrapping.
-                //
-                // This is a highly theoretical concern though, and this is good
-                // enough for all cases we know of at this point. :)
-                //                
-                HasNSW |= GEP->isInBounds();
+                  Flags = setFlags(Flags, SCEV::FlagNSW);
+              } else if (const GEPOperator *GEP =
+                         dyn_cast<GEPOperator>(BEValueV)) {
+                // If the increment is an inbounds GEP, then we know the address
+                // space cannot be wrapped around. We cannot make any guarantee
+                // about signed or unsigned overflow because pointers are
+                // unsigned but we may have a negative index from the base
+                // pointer.
+                if (GEP->isInBounds())
+                  Flags = setFlags(Flags, SCEV::FlagNW);
               }
 
               const SCEV *StartVal = getSCEV(StartValueV);
-              const SCEV *PHISCEV =
-                getAddRecExpr(StartVal, Accum, L, HasNUW, HasNSW);
+              const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
 
               // Since the no-wrap flags are on the increment, they apply to the
               // post-incremented value as well.
               if (isLoopInvariant(Accum, L))
                 (void)getAddRecExpr(getAddExpr(StartVal, Accum),
-                                    Accum, L, HasNUW, HasNSW);
+                                    Accum, L, Flags);
 
               // Okay, for the entire analysis of this edge we assumed the PHI
               // to be symbolic.  We now need to go back and purge all of the
@@ -2834,8 +2920,11 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
             // initial step of the addrec evolution.
             if (StartVal == getMinusSCEV(AddRec->getOperand(0),
                                          AddRec->getOperand(1))) {
+              // FIXME: For constant StartVal, we should be able to infer
+              // no-wrap flags.
               const SCEV *PHISCEV =
-                 getAddRecExpr(StartVal, AddRec->getOperand(1), L);
+                getAddRecExpr(StartVal, AddRec->getOperand(1), L,
+                              SCEV::FlagAnyWrap);
 
               // Okay, for the entire analysis of this edge we assumed the PHI
               // to be symbolic.  We now need to go back and purge all of the
@@ -2899,8 +2988,9 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
       IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
 
       // Multiply the index by the element size to compute the element offset.
-      const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, /*NUW*/ false,
-                                           /*NSW*/ isInBounds);
+      const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize,
+                                           isInBounds ? SCEV::FlagNSW :
+                                           SCEV::FlagAnyWrap);
 
       // Add the element offset to the running total offset.
       TotalOffset = getAddExpr(TotalOffset, LocalOffset);
@@ -2911,8 +3001,8 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
   const SCEV *BaseS = getSCEV(Base);
 
   // Add the total offset from all the GEP indices to the base.
-  return getAddExpr(BaseS, TotalOffset, /*NUW*/ false,
-                    /*NSW*/ isInBounds);
+  return getAddExpr(BaseS, TotalOffset,
+                    isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap);
 }
 
 /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
@@ -3074,7 +3164,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
     // If there's no unsigned wrap, the value will never be less than its
     // initial value.
-    if (AddRec->hasNoUnsignedWrap())
+    if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
         if (!C->getValue()->isZero())
           ConservativeResult =
@@ -3216,7 +3306,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
     // If there's no signed wrap, and all the operands have the same sign or
     // zero, the value won't ever change sign.
-    if (AddRec->hasNoSignedWrap()) {
+    if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
       bool AllNonNeg = true;
       bool AllNonPos = true;
       for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
@@ -3349,7 +3439,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
     SmallVector<const SCEV *, 4> MulOps;
     MulOps.push_back(getSCEV(U->getOperand(1)));
     for (Value *Op = U->getOperand(0);
-         Op->getValueID() == Instruction::Mul + Value::InstructionVal; 
+         Op->getValueID() == Instruction::Mul + Value::InstructionVal;
          Op = U->getOperand(0)) {
       U = cast<Operator>(Op);
       MulOps.push_back(getSCEV(U->getOperand(1)));
@@ -3411,10 +3501,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
         // transfer the no-wrap flags, since an or won't introduce a wrap.
         if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
           const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
-          if (OldAR->hasNoUnsignedWrap())
-            const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoUnsignedWrap(true);
-          if (OldAR->hasNoSignedWrap())
-            const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoSignedWrap(true);
+          const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
+            OldAR->getNoWrapFlags());
         }
         return S;
       }
@@ -3700,19 +3788,20 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   if (!Pair.second)
     return Pair.first->second;
 
-  BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L);
-  if (BECount.Exact != getCouldNotCompute()) {
-    assert(isLoopInvariant(BECount.Exact, L) &&
-           isLoopInvariant(BECount.Max, L) &&
+  BackedgeTakenInfo Result = getCouldNotCompute();
+  BackedgeTakenInfo Computed = ComputeBackedgeTakenCount(L);
+  if (Computed.Exact != getCouldNotCompute()) {
+    assert(isLoopInvariant(Computed.Exact, L) &&
+           isLoopInvariant(Computed.Max, L) &&
            "Computed backedge-taken count isn't loop invariant for loop!");
     ++NumTripCountsComputed;
 
     // Update the value in the map.
-    Pair.first->second = BECount;
+    Result = Computed;
   } else {
-    if (BECount.Max != getCouldNotCompute())
+    if (Computed.Max != getCouldNotCompute())
       // Update the value in the map.
-      Pair.first->second = BECount;
+      Result = Computed;
     if (isa<PHINode>(L->getHeader()->begin()))
       // Only count loops that have phi nodes as not being computable.
       ++NumTripCountsNotComputed;
@@ -3723,7 +3812,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   // conservative estimates made without the benefit of trip count
   // information. This is similar to the code in forgetLoop, except that
   // it handles SCEVUnknown PHI nodes specially.
-  if (BECount.hasAnyInfo()) {
+  if (Computed.hasAnyInfo()) {
     SmallVector<Instruction *, 16> Worklist;
     PushLoopPHIs(L, Worklist);
 
@@ -3754,7 +3843,13 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
       PushDefUseChildren(I, Worklist);
     }
   }
-  return Pair.first->second;
+
+  // Re-lookup the insert position, since the call to
+  // ComputeBackedgeTakenCount above could result in a
+  // recusive call to getBackedgeTakenInfo (on a different
+  // loop), which would invalidate the iterator computed
+  // earlier.
+  return BackedgeTakenCounts.find(L)->second = Result;
 }
 
 /// forgetLoop - This method should be called by the client when it has
@@ -4022,105 +4117,6 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
   return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
 }
 
-static const SCEVAddRecExpr *
-isSimpleUnwrappingAddRec(const SCEV *S, const Loop *L) {
-  const SCEVAddRecExpr *SA = dyn_cast<SCEVAddRecExpr>(S);
-  
-  // The SCEV must be an addrec of this loop.
-  if (!SA || SA->getLoop() != L || !SA->isAffine())
-    return 0;
-  
-  // The SCEV must be known to not wrap in some way to be interesting.
-  if (!SA->hasNoUnsignedWrap() && !SA->hasNoSignedWrap())
-    return 0;
-
-  // The stride must be a constant so that we know if it is striding up or down.
-  if (!isa<SCEVConstant>(SA->getOperand(1)))
-    return 0;
-  return SA;
-}
-
-/// getMinusSCEVForExitTest - When considering an exit test for a loop with a
-/// "x != y" exit test, we turn this into a computation that evaluates x-y != 0,
-/// and this function returns the expression to use for x-y.  We know and take
-/// advantage of the fact that this subtraction is only being used in a
-/// comparison by zero context.
-///
-static const SCEV *getMinusSCEVForExitTest(const SCEV *LHS, const SCEV *RHS,
-                                           const Loop *L, ScalarEvolution &SE) {
-  // If either LHS or RHS is an AddRec SCEV (of this loop) that is known to not
-  // wrap (either NSW or NUW), then we know that the value will either become
-  // the other one (and thus the loop terminates), that the loop will terminate
-  // through some other exit condition first, or that the loop has undefined
-  // behavior.  This information is useful when the addrec has a stride that is
-  // != 1 or -1, because it means we can't "miss" the exit value.
-  //
-  // In any of these three cases, it is safe to turn the exit condition into a
-  // "counting down" AddRec (to zero) by subtracting the two inputs as normal,
-  // but since we know that the "end cannot be missed" we can force the
-  // resulting AddRec to be a NUW addrec.  Since it is counting down, this means
-  // that the AddRec *cannot* pass zero.
-
-  // See if LHS and RHS are addrec's we can handle.
-  const SCEVAddRecExpr *LHSA = isSimpleUnwrappingAddRec(LHS, L);
-  const SCEVAddRecExpr *RHSA = isSimpleUnwrappingAddRec(RHS, L);
-  
-  // If neither addrec is interesting, just return a minus.
-  if (RHSA == 0 && LHSA == 0)
-    return SE.getMinusSCEV(LHS, RHS);
-  
-  // If only one of LHS and RHS are an AddRec of this loop, make sure it is LHS.
-  if (RHSA && LHSA == 0) {
-    // Safe because a-b === b-a for comparisons against zero.
-    std::swap(LHS, RHS);
-    std::swap(LHSA, RHSA);
-  }
-  
-  // Handle the case when only one is advancing in a non-overflowing way.
-  if (RHSA == 0) {
-    // If RHS is loop varying, then we can't predict when LHS will cross it.
-    if (!SE.isLoopInvariant(RHS, L))
-      return SE.getMinusSCEV(LHS, RHS);
-    
-    // If LHS has a positive stride, then we compute RHS-LHS, because the loop
-    // is counting up until it crosses RHS (which must be larger than LHS).  If
-    // it is negative, we compute LHS-RHS because we're counting down to RHS.
-    const ConstantInt *Stride =
-      cast<SCEVConstant>(LHSA->getOperand(1))->getValue();
-    if (Stride->getValue().isNegative())
-      std::swap(LHS, RHS);
-
-    return SE.getMinusSCEV(RHS, LHS, true /*HasNUW*/);
-  }
-  
-  // If both LHS and RHS are interesting, we have something like:
-  //  a+i*4 != b+i*8.
-  const ConstantInt *LHSStride =
-    cast<SCEVConstant>(LHSA->getOperand(1))->getValue();
-  const ConstantInt *RHSStride =
-    cast<SCEVConstant>(RHSA->getOperand(1))->getValue();
-  
-  // If the strides are equal, then this is just a (complex) loop invariant
-  // comparison of a and b.
-  if (LHSStride == RHSStride)
-    return SE.getMinusSCEV(LHSA->getStart(), RHSA->getStart());
-  
-  // If the signs of the strides differ, then the negative stride is counting
-  // down to the positive stride.
-  if (LHSStride->getValue().isNegative() != RHSStride->getValue().isNegative()){
-    if (RHSStride->getValue().isNegative())
-      std::swap(LHS, RHS);
-  } else {
-    // If LHS's stride is smaller than RHS's stride, then "b" must be less than
-    // "a" and "b" is RHS is counting up (catching up) to LHS.  This is true
-    // whether the strides are positive or negative.
-    if (RHSStride->getValue().slt(LHSStride->getValue()))
-      std::swap(LHS, RHS);
-  }
-    
-  return SE.getMinusSCEV(LHS, RHS, true /*HasNUW*/);
-}
-
 /// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the
 /// backedge of the specified loop will execute if its exit condition
 /// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
@@ -4180,8 +4176,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
   switch (Cond) {
   case ICmpInst::ICMP_NE: {                     // while (X != Y)
     // Convert to: while (X-Y != 0)
-    BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEVForExitTest(LHS, RHS, L,
-                                                                 *this), L);
+    BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L);
     if (BTI.hasAnyInfo()) return BTI;
     break;
   }
@@ -4706,7 +4701,15 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
       for (++i; i != e; ++i)
         NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
 
-      AddRec = cast<SCEVAddRecExpr>(getAddRecExpr(NewOps, AddRec->getLoop()));
+      const SCEV *FoldedRec =
+        getAddRecExpr(NewOps, AddRec->getLoop(),
+                      AddRec->getNoWrapFlags(SCEV::FlagNW));
+      AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
+      // The addrec may be folded to a nonrecurrence, for example, if the
+      // induction variable is multiplied by zero after constant folding. Go
+      // ahead and return the folded value.
+      if (!AddRec)
+        return FoldedRec;
       break;
     }
 
@@ -4871,6 +4874,11 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
 
 /// HowFarToZero - Return the number of times a backedge comparing the specified
 /// value to zero will execute.  If not computable, return CouldNotCompute.
+///
+/// This is only used for loops with a "x != y" exit test. The exit condition is
+/// now expressed as a single expression, V = x-y. So the exit test is
+/// effectively V != 0.  We know and take advantage of the fact that this
+/// expression only being used in a comparison by zero context.
 ScalarEvolution::BackedgeTakenInfo
 ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
   // If the value is a constant
@@ -4903,7 +4911,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
                                                       R2->getValue()))) {
         if (CB->getZExtValue() == false)
           std::swap(R1, R2);   // R1 is the minimum root now.
-        
+
         // We can only use this value if the chrec ends up with an exact zero
         // value at this index.  When solving for "X*X != 5", for example, we
         // should not accept a root of 2.
@@ -4934,26 +4942,43 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
   const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
   const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
 
-  // If the AddRec is NUW, then (in an unsigned sense) it cannot be counting up
-  // to wrap to 0, it must be counting down to equal 0.  Also, while counting
-  // down, it cannot "miss" 0 (which would cause it to wrap), regardless of what
-  // the stride is.  As such, NUW addrec's will always become zero in
-  // "start / -stride" steps, and we know that the division is exact.
-  if (AddRec->hasNoUnsignedWrap())
-    // FIXME: We really want an "isexact" bit for udiv.
-    return getUDivExpr(Start, getNegativeSCEV(Step));
-  
   // For now we handle only constant steps.
+  //
+  // TODO: Handle a nonconstant Step given AddRec<NUW>. If the
+  // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
+  // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
+  // We have not yet seen any such cases.
   const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
   if (StepC == 0)
     return getCouldNotCompute();
 
-  // First, handle unitary steps.
-  if (StepC->getValue()->equalsInt(1))      // 1*N = -Start (mod 2^BW), so:
-    return getNegativeSCEV(Start);          //   N = -Start (as unsigned)
-  
-  if (StepC->getValue()->isAllOnesValue())  // -1*N = -Start (mod 2^BW), so:
-    return Start;                           //    N = Start (as unsigned)
+  // For positive steps (counting up until unsigned overflow):
+  //   N = -Start/Step (as unsigned)
+  // For negative steps (counting down to zero):
+  //   N = Start/-Step
+  // First compute the unsigned distance from zero in the direction of Step.
+  bool CountDown = StepC->getValue()->getValue().isNegative();
+  const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
+
+  // Handle unitary steps, which cannot wraparound.
+  // 1*N = -Start; -1*N = Start (mod 2^BW), so:
+  //   N = Distance (as unsigned)
+  if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue())
+    return Distance;
+
+  // If the recurrence is known not to wraparound, unsigned divide computes the
+  // back edge count. We know that the value will either become zero (and thus
+  // the loop terminates), that the loop will terminate through some other exit
+  // condition first, or that the loop has undefined behavior.  This means
+  // we can't "miss" the exit value, even with nonunit stride.
+  //
+  // FIXME: Prove that loops always exhibits *acceptable* undefined
+  // behavior. Loops must exhibit defined behavior until a wrapped value is
+  // actually used. So the trip count computed by udiv could be smaller than the
+  // number of well-defined iterations.
+  if (AddRec->getNoWrapFlags(SCEV::FlagNW))
+    // FIXME: We really want an "isexact" bit for udiv.
+    return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
 
   // Then, try to solve the above equation provided that Start is constant.
   if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
@@ -5220,12 +5245,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
   case ICmpInst::ICMP_SLE:
     if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
-                       /*HasNUW=*/false, /*HasNSW=*/true);
+                       SCEV::FlagNSW);
       Pred = ICmpInst::ICMP_SLT;
       Changed = true;
     } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
       LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
-                       /*HasNUW=*/false, /*HasNSW=*/true);
+                       SCEV::FlagNSW);
       Pred = ICmpInst::ICMP_SLT;
       Changed = true;
     }
@@ -5233,12 +5258,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
   case ICmpInst::ICMP_SGE:
     if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
-                       /*HasNUW=*/false, /*HasNSW=*/true);
+                       SCEV::FlagNSW);
       Pred = ICmpInst::ICMP_SGT;
       Changed = true;
     } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
       LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
-                       /*HasNUW=*/false, /*HasNSW=*/true);
+                       SCEV::FlagNSW);
       Pred = ICmpInst::ICMP_SGT;
       Changed = true;
     }
@@ -5246,12 +5271,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
   case ICmpInst::ICMP_ULE:
     if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
-                       /*HasNUW=*/true, /*HasNSW=*/false);
+                       SCEV::FlagNUW);
       Pred = ICmpInst::ICMP_ULT;
       Changed = true;
     } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
       LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
-                       /*HasNUW=*/true, /*HasNSW=*/false);
+                       SCEV::FlagNUW);
       Pred = ICmpInst::ICMP_ULT;
       Changed = true;
     }
@@ -5259,12 +5284,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
   case ICmpInst::ICMP_UGE:
     if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
-                       /*HasNUW=*/true, /*HasNSW=*/false);
+                       SCEV::FlagNUW);
       Pred = ICmpInst::ICMP_UGT;
       Changed = true;
     } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
       LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
-                       /*HasNUW=*/true, /*HasNSW=*/false);
+                       SCEV::FlagNUW);
       Pred = ICmpInst::ICMP_UGT;
       Changed = true;
     }
@@ -5646,6 +5671,13 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
          "This code doesn't handle negative strides yet!");
 
   const Type *Ty = Start->getType();
+
+  // When Start == End, we have an exact BECount == 0. Short-circuit this case
+  // here because SCEV may not be able to determine that the unsigned division
+  // after rounding is zero.
+  if (Start == End)
+    return getConstant(Ty, 0);
+
   const SCEV *NegOne = getConstant(Ty, (uint64_t)-1);
   const SCEV *Diff = getMinusSCEV(End, Start);
   const SCEV *RoundUp = getAddExpr(Step, NegOne);
@@ -5683,8 +5715,8 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     return getCouldNotCompute();
 
   // Check to see if we have a flag which makes analysis easy.
-  bool NoWrap = isSigned ? AddRec->hasNoSignedWrap() :
-                           AddRec->hasNoUnsignedWrap();
+  bool NoWrap = isSigned ? AddRec->getNoWrapFlags(SCEV::FlagNSW) :
+                           AddRec->getNoWrapFlags(SCEV::FlagNUW);
 
   if (AddRec->isAffine()) {
     unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
@@ -5768,7 +5800,16 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
 
     // The maximum backedge count is similar, except using the minimum start
     // value and the maximum end value.
-    const SCEV *MaxBECount = getBECount(MinStart, MaxEnd, Step, NoWrap);
+    // If we already have an exact constant BECount, use it instead.
+    const SCEV *MaxBECount = isa<SCEVConstant>(BECount) ? BECount
+      : getBECount(MinStart, MaxEnd, Step, NoWrap);
+
+    // If the stride is nonconstant, and NoWrap == true, then
+    // getBECount(MinStart, MaxEnd) may not compute. This would result in an
+    // exact BECount and invalid MaxBECount, which should be avoided to catch
+    // more optimization opportunities.
+    if (isa<SCEVCouldNotCompute>(MaxBECount))
+      MaxBECount = BECount;
 
     return BackedgeTakenInfo(BECount, MaxBECount);
   }
@@ -5791,7 +5832,8 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
     if (!SC->getValue()->isZero()) {
       SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
       Operands[0] = SE.getConstant(SC->getType(), 0);
-      const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop());
+      const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
+                                             getNoWrapFlags(FlagNW));
       if (const SCEVAddRecExpr *ShiftedAddRec =
             dyn_cast<SCEVAddRecExpr>(Shifted))
         return ShiftedAddRec->getNumIterationsInRange(
@@ -5852,7 +5894,9 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
     // Range.getUpper() is crossed.
     SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
     NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
-    const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop());
+    const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(),
+                                             // getNoWrapFlags(FlagNW)
+                                             FlagAnyWrap);
 
     // Next, solve the constructed addrec
     std::pair<const SCEV *,const SCEV *> Roots =
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index b7c110f..8e5a400 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -262,7 +262,8 @@ static bool FactorOutConstant(const SCEV *&S,
     const SCEV *Start = A->getStart();
     if (!FactorOutConstant(Start, Remainder, Factor, SE, TD))
       return false;
-    S = SE.getAddRecExpr(Start, Step, A->getLoop());
+    // FIXME: can use A->getNoWrapFlags(FlagNW)
+    S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap);
     return true;
   }
 
@@ -314,7 +315,9 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
       const SCEV *Zero = SE.getConstant(Ty, 0);
       AddRecs.push_back(SE.getAddRecExpr(Zero,
                                          A->getStepRecurrence(SE),
-                                         A->getLoop()));
+                                         A->getLoop(),
+                                         // FIXME: A->getNoWrapFlags(FlagNW)
+                                         SCEV::FlagAnyWrap));
       if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
         Ops[i] = Zero;
         Ops.append(Add->op_begin(), Add->op_end());
@@ -823,7 +826,9 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
     Rest = SE.getAddExpr(Rest,
                          SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
                                           A->getStepRecurrence(SE),
-                                          A->getLoop()));
+                                          A->getLoop(),
+                                          // FIXME: A->getNoWrapFlags(FlagNW)
+                                          SCEV::FlagAnyWrap));
   }
   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
     Base = A->getOperand(A->getNumOperands()-1);
@@ -858,7 +863,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
         // loop already visited by LSR for example, but it wouldn't have
         // to be.
         do {
-          if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV)) {
+          if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
+              (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) {
             IncV = 0;
             break;
           }
@@ -926,14 +932,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
 
   // Create the PHI.
-  Builder.SetInsertPoint(L->getHeader(), L->getHeader()->begin());
-  PHINode *PN = Builder.CreatePHI(ExpandTy, "lsr.iv");
+  BasicBlock *Header = L->getHeader();
+  Builder.SetInsertPoint(Header, Header->begin());
+  pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+  PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), "lsr.iv");
   rememberInstruction(PN);
 
   // Create the step instructions and populate the PHI.
-  BasicBlock *Header = L->getHeader();
-  for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header);
-       HPI != HPE; ++HPI) {
+  for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
     BasicBlock *Pred = *HPI;
 
     // Add a start value.
@@ -1004,10 +1010,11 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   if (!SE.properlyDominates(Start, L->getHeader())) {
     PostLoopOffset = Start;
     Start = SE.getConstant(Normalized->getType(), 0);
-    Normalized =
-      cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start,
-                                            Normalized->getStepRecurrence(SE),
-                                            Normalized->getLoop()));
+    Normalized = cast<SCEVAddRecExpr>(
+      SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
+                       Normalized->getLoop(),
+                       // FIXME: Normalized->getNoWrapFlags(FlagNW)
+                       SCEV::FlagAnyWrap));
   }
 
   // Strip off any non-loop-dominating component from the addrec step.
@@ -1018,7 +1025,10 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
     Step = SE.getConstant(Normalized->getType(), 1);
     Normalized =
       cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step,
-                                            Normalized->getLoop()));
+                                            Normalized->getLoop(),
+                                            // FIXME: Normalized
+                                            // ->getNoWrapFlags(FlagNW)
+                                            SCEV::FlagAnyWrap));
   }
 
   // Expand the core addrec. If we need post-loop scaling, force it to
@@ -1081,7 +1091,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
     for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
       NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
-    Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop()));
+    Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
+                                       // FIXME: S->getNoWrapFlags(FlagNW)
+                                       SCEV::FlagAnyWrap));
     BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
     BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
     BasicBlock::iterator NewInsertPt =
@@ -1098,7 +1110,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   if (!S->getStart()->isZero()) {
     SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
     NewOps[0] = SE.getConstant(Ty, 0);
-    const SCEV *Rest = SE.getAddRecExpr(NewOps, L);
+    // FIXME: can use S->getNoWrapFlags()
+    const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap);
 
     // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
     // comments on expandAddToGEP for details.
@@ -1128,12 +1141,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     // Create and insert the PHI node for the induction variable in the
     // specified loop.
     BasicBlock *Header = L->getHeader();
-    CanonicalIV = PHINode::Create(Ty, "indvar", Header->begin());
+    pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
+    CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
+                                  Header->begin());
     rememberInstruction(CanonicalIV);
 
     Constant *One = ConstantInt::get(Ty, 1);
-    for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header);
-         HPI != HPE; ++HPI) {
+    for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
       BasicBlock *HP = *HPI;
       if (L->contains(HP)) {
         // Insert a unit add instruction right before the terminator
@@ -1333,7 +1347,7 @@ void SCEVExpander::rememberInstruction(Value *I) {
     InsertedValues.insert(I);
 
   // If we just claimed an existing instruction and that instruction had
-  // been the insert point, adjust the insert point forward so that 
+  // been the insert point, adjust the insert point forward so that
   // subsequently inserted code will be dominated.
   if (Builder.GetInsertPoint() == I) {
     BasicBlock::iterator It = cast<Instruction>(I);
@@ -1361,8 +1375,9 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
   assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
 
   // Build a SCEV for {0,+,1}<L>.
+  // Conservatively use FlagAnyWrap for now.
   const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
-                                   SE.getConstant(Ty, 1), L);
+                                   SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
 
   // Emit code for it.
   BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index ac36cef..60e630a 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -97,7 +97,8 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
       const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT);
       Operands.push_back(N);
     }
-    const SCEV *Result = SE.getAddRecExpr(Operands, L);
+    // Conservatively use AnyWrap until/unless we need FlagNW.
+    const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
     switch (Kind) {
     default: llvm_unreachable("Unexpected transform name!");
     case NormalizeAutodetect:
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 40e18ab..0faf1398 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -31,7 +31,7 @@
 //
 // The second field identifies the type's parent node in the tree, or
 // is null or omitted for a root node. A type is considered to alias
-// all of its decendents and all of its ancestors in the tree. Also,
+// all of its descendants and all of its ancestors in the tree. Also,
 // a type is considered to alias all types in other trees, so that
 // bitcode produced from multiple front-ends is handled conservatively.
 //
@@ -59,6 +59,7 @@
 
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/Constants.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Metadata.h"
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 1060bc5..8f18dd2 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -429,6 +429,29 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
       KnownZero |= LHSKnownZero & Mask;
       KnownOne  |= LHSKnownOne & Mask;
     }
+
+    // Are we still trying to solve for the sign bit?
+    if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()){
+      OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(I);
+      if (OBO->hasNoSignedWrap()) {
+        if (I->getOpcode() == Instruction::Add) {
+          // Adding two positive numbers can't wrap into negative
+          if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
+            KnownZero |= APInt::getSignBit(BitWidth);
+          // and adding two negative numbers can't wrap into positive.
+          else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
+            KnownOne |= APInt::getSignBit(BitWidth);
+        } else {
+          // Subtracting a negative number from a positive one can't wrap
+          if (LHSKnownZero.isNegative() && KnownOne2.isNegative())
+            KnownZero |= APInt::getSignBit(BitWidth);
+          // neither can subtracting a positive number from a negative one.
+          else if (LHSKnownOne.isNegative() && KnownZero2.isNegative())
+            KnownOne |= APInt::getSignBit(BitWidth);
+        }
+      }
+    }
+
     return;
   }
   case Instruction::SRem:
@@ -460,6 +483,19 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
         assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
       }
     }
+
+    // The sign bit is the LHS's sign bit, except when the result of the
+    // remainder is zero.
+    if (Mask.isNegative() && KnownZero.isNonNegative()) {
+      APInt Mask2 = APInt::getSignBit(BitWidth);
+      APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+      ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
+                        Depth+1);
+      // If it's known zero, our sign bit is also zero.
+      if (LHSKnownZero.isNegative())
+        KnownZero |= LHSKnownZero;
+    }
+
     break;
   case Instruction::URem: {
     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -597,6 +633,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     // Otherwise take the unions of the known bit sets of the operands,
     // taking conservative care to avoid excessive recursion.
     if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) {
+      // Skip if every incoming value references to ourself.
+      if (P->hasConstantValue() == P)
+        break;
+
       KnownZero = APInt::getAllOnesValue(BitWidth);
       KnownOne = APInt::getAllOnesValue(BitWidth);
       for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) {
@@ -684,6 +724,16 @@ bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) {
     return isPowerOfTwo(SI->getTrueValue(), TD, Depth) &&
       isPowerOfTwo(SI->getFalseValue(), TD, Depth);
 
+  // An exact divide or right shift can only shift off zero bits, so the result
+  // is a power of two only if the first operand is a power of two and not
+  // copying a sign bit (sdiv int_min, 2).
+  if (match(V, m_LShr(m_Value(), m_Value())) ||
+      match(V, m_UDiv(m_Value(), m_Value()))) {
+    PossiblyExactOperator *PEO = cast<PossiblyExactOperator>(V);
+    if (PEO->isExact())
+      return isPowerOfTwo(PEO->getOperand(0), TD, Depth);
+  }
+
   return false;
 }
 
@@ -720,6 +770,11 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
   // shl X, Y != 0 if X is odd.  Note that the value of the shift is undefined
   // if the lowest bit is shifted off the end.
   if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) {
+    // shl nuw can't remove any non-zero bits.
+    BinaryOperator *BO = cast<BinaryOperator>(V);
+    if (BO->hasNoUnsignedWrap())
+      return isKnownNonZero(X, TD, Depth);
+
     APInt KnownZero(BitWidth, 0);
     APInt KnownOne(BitWidth, 0);
     ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth);
@@ -729,11 +784,22 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
   // shr X, Y != 0 if X is negative.  Note that the value of the shift is not
   // defined if the sign bit is shifted off the end.
   else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
+    // shr exact can only shift out zero bits.
+    BinaryOperator *BO = cast<BinaryOperator>(V);
+    if (BO->isExact())
+      return isKnownNonZero(X, TD, Depth);
+
     bool XKnownNonNegative, XKnownNegative;
     ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
     if (XKnownNegative)
       return true;
   }
+  // div exact can only produce a zero if the dividend is zero.
+  else if (match(V, m_IDiv(m_Value(X), m_Value()))) {
+    BinaryOperator *BO = cast<BinaryOperator>(V);
+    if (BO->isExact())
+      return isKnownNonZero(X, TD, Depth);
+  }
   // X + Y.
   else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
     bool XKnownNonNegative, XKnownNegative;
@@ -1262,7 +1328,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
         break;
       }
     }
-    // If we succesfully found a value for each of our subaggregates 
+    // If we successfully found a value for each of our subaggregates
     if (To)
       return To;
   }
@@ -1691,7 +1757,7 @@ llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) {
     } else {
       // See if InstructionSimplify knows any relevant tricks.
       if (Instruction *I = dyn_cast<Instruction>(V))
-        // TODO: Aquire a DominatorTree and use it.
+        // TODO: Acquire a DominatorTree and use it.
         if (Value *Simplified = SimplifyInstruction(I, TD, 0)) {
           V = Simplified;
           continue;
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index c5ad5fc..8fcc7aa 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -18,7 +18,6 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Process.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/system_error.h"
 #include <fstream>
@@ -28,7 +27,7 @@ using namespace llvm;
 
 // Write an integer using variable bit rate encoding. This saves a few bytes
 // per entry in the symbol table.
-static inline void writeInteger(unsigned num, raw_ostream& ARFile) {
+static inline void writeInteger(unsigned num, std::ofstream& ARFile) {
   while (1) {
     if (num < 0x80) { // done?
       ARFile << (unsigned char)num;
@@ -202,14 +201,14 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where,
 bool
 Archive::writeMember(
   const ArchiveMember& member,
-  raw_ostream& ARFile,
+  std::ofstream& ARFile,
   bool CreateSymbolTable,
   bool TruncateNames,
   bool ShouldCompress,
   std::string* ErrMsg
 ) {
 
-  unsigned filepos = ARFile.tell();
+  unsigned filepos = ARFile.tellp();
   filepos -= 8;
 
   // Get the data and its size either from the
@@ -282,7 +281,7 @@ Archive::writeMember(
   ARFile.write(data,fSize);
 
   // Make sure the member is an even length
-  if ((ARFile.tell() & 1) == 1)
+  if ((ARFile.tellp() & 1) == 1)
     ARFile << ARFILE_PAD;
 
   // Close the mapped file if it was opened
@@ -292,7 +291,7 @@ Archive::writeMember(
 
 // Write out the LLVM symbol table as an archive member to the file.
 void
-Archive::writeSymbolTable(raw_ostream& ARFile) {
+Archive::writeSymbolTable(std::ofstream& ARFile) {
 
   // Construct the symbol table's header
   ArchiveMemberHeader Hdr;
@@ -316,7 +315,7 @@ Archive::writeSymbolTable(raw_ostream& ARFile) {
 
 #ifndef NDEBUG
   // Save the starting position of the symbol tables data content.
-  unsigned startpos = ARFile.tell();
+  unsigned startpos = ARFile.tellp();
 #endif
 
   // Write out the symbols sequentially
@@ -333,7 +332,7 @@ Archive::writeSymbolTable(raw_ostream& ARFile) {
 
 #ifndef NDEBUG
   // Now that we're done with the symbol table, get the ending file position
-  unsigned endpos = ARFile.tell();
+  unsigned endpos = ARFile.tellp();
 #endif
 
   // Make sure that the amount we wrote is what we pre-computed. This is
@@ -362,20 +361,25 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
   }
 
   // Create a temporary file to store the archive in
-  SmallString<128> TempArchivePath;
-  int ArchFD;
-  if (error_code ec =
-      sys::fs::unique_file("%%-%%-%%-%%-" + sys::path::filename(archPath.str()),
-                           ArchFD, TempArchivePath)) {
-    if (ErrMsg) *ErrMsg = ec.message();
+  sys::Path TmpArchive = archPath;
+  if (TmpArchive.createTemporaryFileOnDisk(ErrMsg))
     return true;
-  }
 
   // Make sure the temporary gets removed if we crash
-  sys::RemoveFileOnSignal(sys::Path(TempArchivePath.str()));
+  sys::RemoveFileOnSignal(TmpArchive);
 
   // Create archive file for output.
-  raw_fd_ostream ArchiveFile(ArchFD, true);
+  std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
+                               std::ios::binary;
+  std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode);
+
+  // Check for errors opening or creating archive file.
+  if (!ArchiveFile.is_open() || ArchiveFile.bad()) {
+    TmpArchive.eraseFromDisk();
+    if (ErrMsg)
+      *ErrMsg = "Error opening archive file: " + archPath.str();
+    return true;
+  }
 
   // If we're creating a symbol table, reset it now
   if (CreateSymbolTable) {
@@ -391,9 +395,8 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
   for (MembersList::iterator I = begin(), E = end(); I != E; ++I) {
     if (writeMember(*I, ArchiveFile, CreateSymbolTable,
                      TruncateNames, Compress, ErrMsg)) {
+      TmpArchive.eraseFromDisk();
       ArchiveFile.close();
-      bool existed;
-      sys::fs::remove(TempArchivePath.str(), existed);
       return true;
     }
   }
@@ -408,12 +411,12 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
     // ensure compatibility with other archivers we need to put the symbol
     // table first in the file. Unfortunately, this means mapping the file
     // we just wrote back in and copying it to the destination file.
-    SmallString<128> TempArchiveWithSymbolTablePath;
+    sys::Path FinalFilePath = archPath;
 
     // Map in the archive we just wrote.
     {
     OwningPtr<MemoryBuffer> arch;
-    if (error_code ec = MemoryBuffer::getFile(TempArchivePath.c_str(), arch)) {
+    if (error_code ec = MemoryBuffer::getFile(TmpArchive.c_str(), arch)) {
       if (ErrMsg)
         *ErrMsg = ec.message();
       return true;
@@ -422,15 +425,17 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
 
     // Open another temporary file in order to avoid invalidating the
     // mmapped data
-    if (error_code ec =
-      sys::fs::unique_file("%%-%%-%%-%%-" + sys::path::filename(archPath.str()),
-                           ArchFD, TempArchiveWithSymbolTablePath)) {
-      if (ErrMsg) *ErrMsg = ec.message();
+    if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg))
       return true;
-    }
-    sys::RemoveFileOnSignal(sys::Path(TempArchiveWithSymbolTablePath.str()));
+    sys::RemoveFileOnSignal(FinalFilePath);
 
-    raw_fd_ostream FinalFile(ArchFD, true);
+    std::ofstream FinalFile(FinalFilePath.c_str(), io_mode);
+    if (!FinalFile.is_open() || FinalFile.bad()) {
+      TmpArchive.eraseFromDisk();
+      if (ErrMsg)
+        *ErrMsg = "Error opening archive file: " + FinalFilePath.str();
+      return true;
+    }
 
     // Write the file magic number
     FinalFile << ARFILE_MAGIC;
@@ -443,8 +448,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
     if (foreignST) {
       if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) {
         FinalFile.close();
-        bool existed;
-        sys::fs::remove(TempArchiveWithSymbolTablePath.str(), existed);
+        TmpArchive.eraseFromDisk();
         return true;
       }
     }
@@ -462,11 +466,8 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
     } // free arch.
 
     // Move the final file over top of TmpArchive
-    if (error_code ec = sys::fs::rename(TempArchiveWithSymbolTablePath.str(),
-                                        TempArchivePath.str())) {
-      if (ErrMsg) *ErrMsg = ec.message();
+    if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg))
       return true;
-    }
   }
 
   // Before we replace the actual archive, we need to forget all the
@@ -474,11 +475,8 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
   // this because we cannot replace an open file on Windows.
   cleanUpMemory();
 
-  if (error_code ec = sys::fs::rename(TempArchivePath.str(),
-                                      archPath.str())) {
-    if (ErrMsg) *ErrMsg = ec.message();
+  if (TmpArchive.renamePathOnDisk(archPath, ErrMsg))
     return true;
-  }
 
   // Set correct read and write permissions after temporary file is moved
   // to final destination path.
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index cdfacbe..a2c53be 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -249,11 +249,7 @@ bool LLParser::ParseModuleAsm() {
   if (ParseToken(lltok::kw_asm, "expected 'module asm'") ||
       ParseStringConstant(AsmStr)) return true;
 
-  const std::string &AsmSoFar = M->getModuleInlineAsm();
-  if (AsmSoFar.empty())
-    M->setModuleInlineAsm(AsmStr);
-  else
-    M->setModuleInlineAsm(AsmSoFar+"\n"+AsmStr);
+  M->appendModuleInlineAsm(AsmStr);
   return false;
 }
 
@@ -518,7 +514,7 @@ bool LLParser::ParseMDNodeID(MDNode *&Result) {
   if (Result) return false;
 
   // Otherwise, create MDNode forward reference.
-  MDNode *FwdNode = MDNode::getTemporary(Context, 0, 0);
+  MDNode *FwdNode = MDNode::getTemporary(Context, ArrayRef<Value*>());
   ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc());
   
   if (NumberedMetadata.size() <= MID)
@@ -576,7 +572,7 @@ bool LLParser::ParseStandaloneMetadata() {
       ParseToken(lltok::rbrace, "expected end of metadata node"))
     return true;
 
-  MDNode *Init = MDNode::get(Context, Elts.data(), Elts.size());
+  MDNode *Init = MDNode::get(Context, Elts);
   
   // See if this was forward referenced, if so, handle it.
   std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> >::iterator
@@ -2502,7 +2498,7 @@ bool LLParser::ParseMetadataListValue(ValID &ID, PerFunctionState *PFS) {
       ParseToken(lltok::rbrace, "expected end of metadata node"))
     return true;
 
-  ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size());
+  ID.MDNodeVal = MDNode::get(Context, Elts);
   ID.Kind = ValID::t_MDNode;
   return false;
 }
@@ -3638,8 +3634,7 @@ int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
   if (!Ty->isFirstClassType())
     return Error(TypeLoc, "phi node must have first class type");
 
-  PHINode *PN = PHINode::Create(Ty);
-  PN->reserveOperandSpace(PHIVals.size());
+  PHINode *PN = PHINode::Create(Ty, PHIVals.size());
   for (unsigned i = 0, e = PHIVals.size(); i != e; ++i)
     PN->addIncoming(PHIVals[i].first, PHIVals[i].second);
   Inst = PN;
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index dbf8da0..19f57cf 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -301,8 +301,7 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() {
         NewC = ConstantVector::get(NewOps);
       } else {
         assert(isa<ConstantExpr>(UserC) && "Must be a ConstantExpr.");
-        NewC = cast<ConstantExpr>(UserC)->getWithOperands(&NewOps[0],
-                                                          NewOps.size());
+        NewC = cast<ConstantExpr>(UserC)->getWithOperands(NewOps);
       }
 
       UserC->replaceAllUsesWith(NewC);
@@ -350,7 +349,7 @@ Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
   }
 
   // Create and return a placeholder, which will later be RAUW'd.
-  Value *V = MDNode::getTemporary(Context, 0, 0);
+  Value *V = MDNode::getTemporary(Context, ArrayRef<Value*>());
   MDValuePtrs[Idx] = V;
   return V;
 }
@@ -844,9 +843,7 @@ bool BitcodeReader::ParseMetadata() {
         else
           Elts.push_back(NULL);
       }
-      Value *V = MDNode::getWhenValsUnresolved(Context,
-                                               Elts.data(), Elts.size(),
-                                               IsFunctionLocal);
+      Value *V = MDNode::getWhenValsUnresolved(Context, Elts, IsFunctionLocal);
       IsFunctionLocal = false;
       MDValueList.AssignValue(V, NextMDValueNo++);
       break;
@@ -2288,9 +2285,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       const Type *Ty = getTypeByID(Record[0]);
       if (!Ty) return Error("Invalid PHI record");
 
-      PHINode *PN = PHINode::Create(Ty);
+      PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2);
       InstructionList.push_back(PN);
-      PN->reserveOperandSpace((Record.size()-1)/2);
 
       for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
         Value *V = getFnValueByID(Record[1+i], Ty);
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index f8ef8c6..e34137f 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -197,7 +197,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
 
   // Loop over all of the types, emitting each in turn.
   for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
-    const Type *T = TypeList[i].first;
+    const Type *T = TypeList[i];
     int AbbrevToUse = 0;
     unsigned Code = 0;
 
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 2f02262..21f004a 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -12,6 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "ValueEnumerator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
@@ -21,22 +23,10 @@
 #include <algorithm>
 using namespace llvm;
 
-static bool isSingleValueType(const std::pair<const llvm::Type*,
-                              unsigned int> &P) {
-  return P.first->isSingleValueType();
-}
-
 static bool isIntegerValue(const std::pair<const Value*, unsigned> &V) {
   return V.first->getType()->isIntegerTy();
 }
 
-static bool CompareByFrequency(const std::pair<const llvm::Type*,
-                               unsigned int> &P1,
-                               const std::pair<const llvm::Type*,
-                               unsigned int> &P2) {
-  return P1.second > P2.second;
-}
-
 /// ValueEnumerator - Enumerate module-level information.
 ValueEnumerator::ValueEnumerator(const Module *M) {
   // Enumerate the global variables.
@@ -120,18 +110,72 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
   // Optimize constant ordering.
   OptimizeConstants(FirstConstant, Values.size());
 
-  // Sort the type table by frequency so that most commonly used types are early
-  // in the table (have low bit-width).
-  std::stable_sort(Types.begin(), Types.end(), CompareByFrequency);
-
-  // Partition the Type ID's so that the single-value types occur before the
-  // aggregate types.  This allows the aggregate types to be dropped from the
-  // type table after parsing the global variable initializers.
-  std::partition(Types.begin(), Types.end(), isSingleValueType);
+  OptimizeTypes();
 
   // Now that we rearranged the type table, rebuild TypeMap.
   for (unsigned i = 0, e = Types.size(); i != e; ++i)
-    TypeMap[Types[i].first] = i+1;
+    TypeMap[Types[i]] = i+1;
+}
+
+struct TypeAndDeps {
+  const Type *Ty;
+  unsigned NumDeps;
+};
+
+static int CompareByDeps(const void *a, const void *b) {
+  const TypeAndDeps &ta = *(const TypeAndDeps*) a;
+  const TypeAndDeps &tb = *(const TypeAndDeps*) b;
+  return ta.NumDeps - tb.NumDeps;
+}
+
+static void VisitType(const Type *Ty, SmallPtrSet<const Type*, 16> &Visited,
+                      std::vector<const Type*> &Out) {
+  if (Visited.count(Ty))
+    return;
+
+  Visited.insert(Ty);
+
+  for (Type::subtype_iterator I2 = Ty->subtype_begin(),
+         E2 = Ty->subtype_end(); I2 != E2; ++I2) {
+    const Type *InnerType = I2->get();
+    VisitType(InnerType, Visited, Out);
+  }
+
+  Out.push_back(Ty);
+}
+
+void ValueEnumerator::OptimizeTypes(void) {
+  // If the types form a DAG, this will compute a topological sort and
+  // no forward references will be needed when reading them in.
+  // If there are cycles, this is a simple but reasonable heuristic for
+  // the minimum feedback arc set problem.
+  const unsigned NumTypes = Types.size();
+  std::vector<TypeAndDeps> TypeDeps;
+  TypeDeps.resize(NumTypes);
+
+  for (unsigned I = 0; I < NumTypes; ++I) {
+    const Type *Ty = Types[I];
+    TypeDeps[I].Ty = Ty;
+    TypeDeps[I].NumDeps = 0;
+  }
+
+  for (unsigned I = 0; I < NumTypes; ++I) {
+    const Type *Ty = TypeDeps[I].Ty;
+    for (Type::subtype_iterator I2 = Ty->subtype_begin(),
+           E2 = Ty->subtype_end(); I2 != E2; ++I2) {
+      const Type *InnerType = I2->get();
+      unsigned InnerIndex = TypeMap.lookup(InnerType) - 1;
+      TypeDeps[InnerIndex].NumDeps++;
+    }
+  }
+  array_pod_sort(TypeDeps.begin(), TypeDeps.end(), CompareByDeps);
+
+  SmallPtrSet<const Type*, 16> Visited;
+  Types.clear();
+  Types.reserve(NumTypes);
+  for (unsigned I = 0; I < NumTypes; ++I) {
+    VisitType(TypeDeps[I].Ty, Visited, Types);
+  }
 }
 
 unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
@@ -319,7 +363,7 @@ void ValueEnumerator::EnumerateValue(const Value *V) {
       // Initializers for globals are handled explicitly elsewhere.
     } else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) {
       // Do not enumerate the initializers for an array of simple characters.
-      // The initializers just polute the value table, and we emit the strings
+      // The initializers just pollute the value table, and we emit the strings
       // specially.
     } else if (C->getNumOperands()) {
       // If a constant has operands, enumerate them.  This makes sure that if a
@@ -352,14 +396,12 @@ void ValueEnumerator::EnumerateValue(const Value *V) {
 void ValueEnumerator::EnumerateType(const Type *Ty) {
   unsigned &TypeID = TypeMap[Ty];
 
-  if (TypeID) {
-    // If we've already seen this type, just increase its occurrence count.
-    Types[TypeID-1].second++;
+  // We've already seen this type.
+  if (TypeID)
     return;
-  }
 
   // First time we saw this type, add it.
-  Types.push_back(std::make_pair(Ty, 1U));
+  Types.push_back(Ty);
   TypeID = Types.size();
 
   // Enumerate subtypes.
@@ -381,7 +423,7 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) {
     // This constant may have operands, make sure to enumerate the types in
     // them.
     for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
-      const User *Op = C->getOperand(i);
+      const Value *Op = C->getOperand(i);
       
       // Don't enumerate basic blocks here, this happens as operands to
       // blockaddress.
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index cd1d237..1e42a26 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -36,8 +36,7 @@ class MDSymbolTable;
 
 class ValueEnumerator {
 public:
-  // For each type, we remember its Type* and occurrence frequency.
-  typedef std::vector<std::pair<const Type*, unsigned> > TypeList;
+  typedef std::vector<const Type*> TypeList;
 
   // For each value, we remember its Value* and occurrence frequency.
   typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
@@ -136,6 +135,7 @@ public:
 
 private:
   void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
+  void OptimizeTypes();
     
   void EnumerateMDNodeOperands(const MDNode *N);
   void EnumerateMetadata(const Value *MD);
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index e2838c3..80118f0 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -7,6 +7,7 @@ add_subdirectory(Transforms)
 add_subdirectory(Linker)
 add_subdirectory(Analysis)
 add_subdirectory(MC)
+add_subdirectory(CompilerDriver)
 add_subdirectory(Object)
 add_subdirectory(ExecutionEngine)
 add_subdirectory(Target)
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index b520d8f..5c809f7 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -357,7 +357,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
     RegRefs = State->GetRegRefs();
 
   // Handle dead defs by simulating a last-use of the register just
-  // after the def. A dead def can occur because the def is truely
+  // after the def. A dead def can occur because the def is truly
   // dead, or because only a subregister is live at the def. If we
   // don't do this the dead def will be incorrectly merged into the
   // previous def.
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 36638c3..125e641 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -211,7 +211,6 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
   const BasicBlock *ExitBB = I->getParent();
   const TerminatorInst *Term = ExitBB->getTerminator();
   const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
-  const Function *F = ExitBB->getParent();
 
   // The block must end in a return statement or unreachable.
   //
@@ -250,6 +249,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
 
   // Conservatively require the attributes of the call to match those of
   // the return. Ignore noalias because it doesn't affect the call sequence.
+  const Function *F = ExitBB->getParent();
   unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
   if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
     return false;
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
new file mode 100644
index 0000000..0db28a6
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -0,0 +1,87 @@
+//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+ARMException::ARMException(AsmPrinter *A)
+  : DwarfException(A),
+    shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
+    {}
+
+ARMException::~ARMException() {}
+
+void ARMException::EndModule() {
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void ARMException::BeginFunction(const MachineFunction *MF) {
+  Asm->OutStreamer.EmitFnStart();
+  if (!Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory)
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+                                                  Asm->getFunctionNumber()));
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void ARMException::EndFunction() {
+  if (Asm->MF->getFunction()->doesNotThrow() && !UnwindTablesMandatory)
+    Asm->OutStreamer.EmitCantUnwind();
+  else {
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+                                                  Asm->getFunctionNumber()));
+
+    // Emit references to personality.
+    if (const Function * Personality =
+        MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
+      MCSymbol *PerSym = Asm->Mang->getSymbol(Personality);
+      Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
+      Asm->OutStreamer.EmitPersonality(PerSym);
+    }
+
+    // Map all labels and get rid of any dead landing pads.
+    MMI->TidyLandingPads();
+
+    Asm->OutStreamer.EmitHandlerData();
+
+    // Emit actual exception table
+    EmitExceptionTable();
+  }
+
+  Asm->OutStreamer.EmitFnEnd();
+}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 9cb882e..8116f8d 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -33,10 +33,12 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/ADT/SmallString.h"
@@ -70,17 +72,17 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD,
   unsigned NumBits = 0;
   if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
     NumBits = TD.getPreferredAlignmentLog(GVar);
-  
+
   // If InBits is specified, round it to it.
   if (InBits > NumBits)
     NumBits = InBits;
-  
+
   // If the GV has a specified alignment, take it into account.
   if (GV->getAlignment() == 0)
     return NumBits;
-  
+
   unsigned GVAlign = Log2_32(GV->getAlignment());
-  
+
   // If the GVAlign is larger than NumBits, or if we are required to obey
   // NumBits because the GV has an assigned section, obey it.
   if (GVAlign > NumBits || GV->hasSection())
@@ -104,16 +106,16 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
 
 AsmPrinter::~AsmPrinter() {
   assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized");
-  
+
   if (GCMetadataPrinters != 0) {
     gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
-    
+
     for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I)
       delete I->second;
     delete &GCMap;
     GCMetadataPrinters = 0;
   }
-  
+
   delete &OutStreamer;
 }
 
@@ -156,9 +158,9 @@ bool AsmPrinter::doInitialization(Module &M) {
   // Initialize TargetLoweringObjectFile.
   const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
     .Initialize(OutContext, TM);
-  
+
   Mang = new Mangler(OutContext, *TM.getTargetData());
-  
+
   // Allow the target to emit any magic that it wants at the start of the file.
   EmitStartOfAsmFile(M);
 
@@ -196,6 +198,9 @@ bool AsmPrinter::doInitialization(Module &M) {
     case ExceptionHandling::DwarfCFI:
       DE = new DwarfCFIException(this);
       break;
+    case ExceptionHandling::ARM:
+      DE = new ARMException(this);
+      break;
     }
 
   return false;
@@ -250,56 +255,52 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
 
 /// EmitGlobalVariable - Emit the specified global variable to the .s file.
 void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
-  if (!GV->hasInitializer())   // External globals require no code.
-    return;
-  
-  // Check to see if this is a special global used by LLVM, if so, emit it.
-  if (EmitSpecialLLVMGlobal(GV))
-    return;
+  if (GV->hasInitializer()) {
+    // Check to see if this is a special global used by LLVM, if so, emit it.
+    if (EmitSpecialLLVMGlobal(GV))
+      return;
 
-  if (isVerbose()) {
-    WriteAsOperand(OutStreamer.GetCommentOS(), GV,
-                   /*PrintType=*/false, GV->getParent());
-    OutStreamer.GetCommentOS() << '\n';
+    if (isVerbose()) {
+      WriteAsOperand(OutStreamer.GetCommentOS(), GV,
+                     /*PrintType=*/false, GV->getParent());
+      OutStreamer.GetCommentOS() << '\n';
+    }
   }
-  
+
   MCSymbol *GVSym = Mang->getSymbol(GV);
   EmitVisibility(GVSym, GV->getVisibility());
 
+  if (!GV->hasInitializer())   // External globals require no extra code.
+    return;
+
   if (MAI->hasDotTypeDotSizeDirective())
     OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
-  
+
   SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
 
   const TargetData *TD = TM.getTargetData();
   uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
-  
+
   // If the alignment is specified, we *must* obey it.  Overaligning a global
   // with a specified alignment is a prompt way to break globals emitted to
   // sections and expected to be contiguous (e.g. ObjC metadata).
   unsigned AlignLog = getGVAlignmentLog2(GV, *TD);
-  
+
   // Handle common and BSS local symbols (.lcomm).
   if (GVKind.isCommon() || GVKind.isBSSLocal()) {
     if (Size == 0) Size = 1;   // .comm Foo, 0 is undefined, avoid it.
-    
-    if (isVerbose()) {
-      WriteAsOperand(OutStreamer.GetCommentOS(), GV,
-                     /*PrintType=*/false, GV->getParent());
-      OutStreamer.GetCommentOS() << '\n';
-    }
-    
+
     // Handle common symbols.
     if (GVKind.isCommon()) {
       unsigned Align = 1 << AlignLog;
       if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
         Align = 0;
-          
+
       // .comm _foo, 42, 4
       OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
       return;
     }
-    
+
     // Handle local BSS symbols.
     if (MAI->hasMachoZeroFillDirective()) {
       const MCSection *TheSection =
@@ -308,7 +309,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
       OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
       return;
     }
-    
+
     if (MAI->hasLCOMMDirective()) {
       // .lcomm _foo, 42
       OutStreamer.EmitLocalCommonSymbol(GVSym, Size);
@@ -318,14 +319,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     unsigned Align = 1 << AlignLog;
     if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
       Align = 0;
-    
+
     // .local _foo
     OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local);
     // .comm _foo, 42, 4
     OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
     return;
   }
-  
+
   const MCSection *TheSection =
     getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
 
@@ -333,14 +334,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   // emission.
   if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) {
     if (Size == 0) Size = 1;  // zerofill of 0 bytes is undefined.
-    
+
     // .globl _foo
     OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
     // .zerofill __DATA, __common, _foo, 400, 5
     OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
     return;
   }
-  
+
   // Handle thread local data for mach-o which requires us to output an
   // additional structure of data and mangle the original symbol so that we
   // can reference it later.
@@ -353,31 +354,31 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   // specific code.
   if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
     // Emit the .tbss symbol
-    MCSymbol *MangSym = 
+    MCSymbol *MangSym =
       OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
-    
+
     if (GVKind.isThreadBSS())
       OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
     else if (GVKind.isThreadData()) {
       OutStreamer.SwitchSection(TheSection);
 
-      EmitAlignment(AlignLog, GV);      
+      EmitAlignment(AlignLog, GV);
       OutStreamer.EmitLabel(MangSym);
-      
+
       EmitGlobalConstant(GV->getInitializer());
     }
-    
+
     OutStreamer.AddBlankLine();
-    
+
     // Emit the variable struct for the runtime.
-    const MCSection *TLVSect 
+    const MCSection *TLVSect
       = getObjFileLowering().getTLSExtraDataSection();
-      
+
     OutStreamer.SwitchSection(TLVSect);
     // Emit the linkage here.
     EmitLinkage(GV->getLinkage(), GVSym);
     OutStreamer.EmitLabel(GVSym);
-    
+
     // Three pointers in size:
     //   - __tlv_bootstrap - used to make sure support exists
     //   - spare pointer, used when mapped by the runtime
@@ -387,7 +388,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
                           PtrSize, 0);
     OutStreamer.EmitIntValue(0, PtrSize, 0);
     OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0);
-    
+
     OutStreamer.AddBlankLine();
     return;
   }
@@ -404,7 +405,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   if (MAI->hasDotTypeDotSizeDirective())
     // .size foo, 42
     OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
-  
+
   OutStreamer.AddBlankLine();
 }
 
@@ -413,7 +414,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 void AsmPrinter::EmitFunctionHeader() {
   // Print out constants referenced by the function
   EmitConstantPool();
-  
+
   // Print the 'header' of function.
   const Function *F = MF->getFunction();
 
@@ -435,7 +436,7 @@ void AsmPrinter::EmitFunctionHeader() {
   // Emit the CurrentFnSym.  This is a virtual function to allow targets to
   // do their wild and crazy things as required.
   EmitFunctionEntryLabel();
-  
+
   // If the function had address-taken blocks that got deleted, then we have
   // references to the dangling symbols.  Emit them at the start of the function
   // so that we don't get references to undefined symbols.
@@ -445,17 +446,17 @@ void AsmPrinter::EmitFunctionHeader() {
     OutStreamer.AddComment("Address taken block that was later removed");
     OutStreamer.EmitLabel(DeadBlockSyms[i]);
   }
-  
+
   // Add some workaround for linkonce linkage on Cygwin\MinGW.
   if (MAI->getLinkOnceDirective() != 0 &&
       (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) {
     // FIXME: What is this?
-    MCSymbol *FakeStub = 
+    MCSymbol *FakeStub =
       OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+
                                    CurrentFnSym->getName());
     OutStreamer.EmitLabel(FakeStub);
   }
-  
+
   // Emit pre-function debug and/or EH information.
   if (DE) {
     NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
@@ -480,44 +481,16 @@ void AsmPrinter::EmitFunctionEntryLabel() {
 }
 
 
-static void EmitDebugLoc(DebugLoc DL, const MachineFunction *MF, 
-                         raw_ostream &CommentOS) {
-  const LLVMContext &Ctx = MF->getFunction()->getContext();
-  if (!DL.isUnknown()) {          // Print source line info.
-    DIScope Scope(DL.getScope(Ctx));
-    // Omit the directory, because it's likely to be long and uninteresting.
-    if (Scope.Verify())
-      CommentOS << Scope.getFilename();
-    else
-      CommentOS << "<unknown>";
-    CommentOS << ':' << DL.getLine();
-    if (DL.getCol() != 0)
-      CommentOS << ':' << DL.getCol();
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
-    if (!InlinedAtDL.isUnknown()) {
-      CommentOS << "[ ";
-      EmitDebugLoc(InlinedAtDL, MF, CommentOS);
-      CommentOS << " ]";
-    }
-  }
-}
-
 /// EmitComments - Pretty-print comments for instructions.
 static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
   const MachineFunction *MF = MI.getParent()->getParent();
   const TargetMachine &TM = MF->getTarget();
-  
-  DebugLoc DL = MI.getDebugLoc();
-  if (!DL.isUnknown()) {          // Print source line info.
-    EmitDebugLoc(DL, MF, CommentOS);
-    CommentOS << '\n';
-  }
-  
+
   // Check for spills and reloads
   int FI;
-  
+
   const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
-  
+
   // We assume a single instruction only has a spill or reload, not
   // both.
   const MachineMemOperand *MMO;
@@ -538,7 +511,7 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
     if (FrameInfo->isSpillSlotObjectIndex(FI))
       CommentOS << MMO->getSize() << "-byte Folded Spill\n";
   }
-  
+
   // Check for spill-induced copies
   if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
     CommentOS << " Reload Reuse\n";
@@ -612,21 +585,61 @@ static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
     }
     OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg());
   }
-  
+
   OS << '+' << MI->getOperand(1).getImm();
   // NOTE: Want this comment at start of line, don't emit with AddComment.
   AP.OutStreamer.EmitRawText(OS.str());
   return true;
 }
 
+bool AsmPrinter::needsCFIMoves() {
+  if (UnwindTablesMandatory)
+    return true;
+
+  if (MMI->hasDebugInfo() && !MAI->doesDwarfRequireFrameSection())
+    return true;
+
+  if (MF->getFunction()->doesNotThrow())
+    return false;
+
+  return true;
+}
+
+void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
+  MCSymbol *Label = MI.getOperand(0).getMCSymbol();
+
+  if (MAI->doesDwarfRequireFrameSection() ||
+      MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
+    OutStreamer.EmitLabel(Label);
+
+  if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
+    return;
+
+  if (!needsCFIMoves())
+    return;
+
+  MachineModuleInfo &MMI = MF->getMMI();
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  bool FoundOne = false;
+  (void)FoundOne;
+  for (std::vector<MachineMove>::iterator I = Moves.begin(),
+         E = Moves.end(); I != E; ++I) {
+    if (I->getLabel() == Label) {
+      EmitCFIFrameMove(*I);
+      FoundOne = true;
+    }
+  }
+  assert(FoundOne);
+}
+
 /// EmitFunctionBody - This method emits the body and trailer for a
 /// function.
 void AsmPrinter::EmitFunctionBody() {
   // Emit target-specific gunk before the function body.
   EmitFunctionBodyStart();
-  
+
   bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo();
-  
+
   // Print out code for the function.
   bool HasAnyRealCode = false;
   const MachineInstr *LastMI = 0;
@@ -649,12 +662,15 @@ void AsmPrinter::EmitFunctionBody() {
         NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
         DD->beginInstruction(II);
       }
-      
+
       if (isVerbose())
         EmitComments(*II, OutStreamer.GetCommentOS());
 
       switch (II->getOpcode()) {
       case TargetOpcode::PROLOG_LABEL:
+        emitPrologLabel(*II);
+        break;
+
       case TargetOpcode::EH_LABEL:
       case TargetOpcode::GC_LABEL:
         OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol());
@@ -675,10 +691,13 @@ void AsmPrinter::EmitFunctionBody() {
         if (isVerbose()) EmitKill(II, *this);
         break;
       default:
+        if (!TM.hasMCUseLoc())
+          MCLineEntry::Make(&OutStreamer, getCurrentSection());
+
         EmitInstruction(II);
         break;
       }
-      
+
       if (ShouldPrintDebugScopes) {
         NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
         DD->endInstruction(II);
@@ -705,10 +724,10 @@ void AsmPrinter::EmitFunctionBody() {
     } else  // Target not mc-ized yet.
       OutStreamer.EmitRawText(StringRef("\tnop\n"));
   }
-  
+
   // Emit target-specific gunk after the function body.
   EmitFunctionBodyEnd();
-  
+
   // If the target wants a .size directive for the size of the function, emit
   // it.
   if (MAI->hasDotTypeDotSizeDirective()) {
@@ -716,14 +735,14 @@ void AsmPrinter::EmitFunctionBody() {
     // difference between the function label and the temp label.
     MCSymbol *FnEndLabel = OutContext.CreateTempSymbol();
     OutStreamer.EmitLabel(FnEndLabel);
-    
+
     const MCExpr *SizeExp =
       MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
                               MCSymbolRefExpr::Create(CurrentFnSym, OutContext),
                               OutContext);
     OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
   }
-  
+
   // Emit post-function debug information.
   if (DD) {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
@@ -734,20 +753,71 @@ void AsmPrinter::EmitFunctionBody() {
     DE->EndFunction();
   }
   MMI->EndFunction();
-  
+
   // Print out jump tables referenced by the function.
   EmitJumpTableInfo();
-  
+
   OutStreamer.AddBlankLine();
 }
 
 /// getDebugValueLocation - Get location information encoded by DBG_VALUE
 /// operands.
-MachineLocation AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+MachineLocation AsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
   // Target specific DBG_VALUE instructions are handled by each target.
   return MachineLocation();
 }
 
+/// getDwarfRegOpSize - get size required to emit given machine location using
+/// dwarf encoding.
+unsigned AsmPrinter::getDwarfRegOpSize(const MachineLocation &MLoc) const {
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+  unsigned DWReg = RI->getDwarfRegNum(MLoc.getReg(), false);
+  if (int Offset = MLoc.getOffset()) {
+    // If the value is at a certain offset from frame register then
+    // use DW_OP_breg.
+    if (DWReg < 32)
+      return 1 + MCAsmInfo::getSLEB128Size(Offset);
+    else
+      return 1 + MCAsmInfo::getULEB128Size(MLoc.getReg()) 
+        + MCAsmInfo::getSLEB128Size(Offset);
+  }
+  if (DWReg < 32)
+    return 1;
+
+  return 1 + MCAsmInfo::getULEB128Size(DWReg);
+}
+
+/// EmitDwarfRegOp - Emit dwarf register operation.
+void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  unsigned Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
+  if (int Offset =  MLoc.getOffset()) {
+    if (Reg < 32) {
+      OutStreamer.AddComment(
+        dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
+      EmitInt8(dwarf::DW_OP_breg0 + Reg);
+    } else {
+      OutStreamer.AddComment("DW_OP_bregx");
+      EmitInt8(dwarf::DW_OP_bregx);
+      OutStreamer.AddComment(Twine(Reg));
+      EmitULEB128(Reg);
+    }
+    EmitSLEB128(Offset);
+  } else {
+    if (Reg < 32) {
+      OutStreamer.AddComment(
+        dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
+      EmitInt8(dwarf::DW_OP_reg0 + Reg);
+    } else {
+      OutStreamer.AddComment("DW_OP_regx");
+      EmitInt8(dwarf::DW_OP_regx);
+      OutStreamer.AddComment(Twine(Reg));
+      EmitULEB128(Reg);
+    }
+  }
+}
+
 bool AsmPrinter::doFinalization(Module &M) {
   // Emit global variables.
   for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
@@ -782,7 +852,7 @@ bool AsmPrinter::doFinalization(Module &M) {
     }
     delete DD; DD = 0;
   }
-  
+
   // If the target wants to know about weak references, print them all.
   if (MAI->getWeakRefDirective()) {
     // FIXME: This is not lazy, it would be nice to only print weak references
@@ -796,7 +866,7 @@ bool AsmPrinter::doFinalization(Module &M) {
       if (!I->hasExternalWeakLinkage()) continue;
       OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
     }
-    
+
     for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
       if (!I->hasExternalWeakLinkage()) continue;
       OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
@@ -822,7 +892,7 @@ bool AsmPrinter::doFinalization(Module &M) {
       EmitVisibility(Name, I->getVisibility());
 
       // Emit the directives as assignments aka .set:
-      OutStreamer.EmitAssignment(Name, 
+      OutStreamer.EmitAssignment(Name,
                                  MCSymbolRefExpr::Create(Target, OutContext));
     }
   }
@@ -839,14 +909,14 @@ bool AsmPrinter::doFinalization(Module &M) {
   if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
     if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext))
       OutStreamer.SwitchSection(S);
-  
+
   // Allow the target to emit any magic that it wants at the end of the file,
   // after everything else has gone out.
   EmitEndOfAsmFile(M);
-  
+
   delete Mang; Mang = 0;
   MMI = 0;
-  
+
   OutStreamer.Finish();
   return false;
 }
@@ -886,7 +956,7 @@ void AsmPrinter::EmitConstantPool() {
   for (unsigned i = 0, e = CP.size(); i != e; ++i) {
     const MachineConstantPoolEntry &CPE = CP[i];
     unsigned Align = CPE.getAlignment();
-    
+
     SectionKind Kind;
     switch (CPE.getRelocationInfo()) {
     default: llvm_unreachable("Unknown section kind");
@@ -904,7 +974,7 @@ void AsmPrinter::EmitConstantPool() {
     }
 
     const MCSection *S = getObjFileLowering().getSectionForConstant(Kind);
-    
+
     // The number of sections are small, just do a linear search from the
     // last section to the first.
     bool Found = false;
@@ -953,7 +1023,7 @@ void AsmPrinter::EmitConstantPool() {
 }
 
 /// EmitJumpTableInfo - Print assembly representations of the jump tables used
-/// by the current function to the current output stream.  
+/// by the current function to the current output stream.
 ///
 void AsmPrinter::EmitJumpTableInfo() {
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
@@ -962,7 +1032,7 @@ void AsmPrinter::EmitJumpTableInfo() {
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty()) return;
 
-  // Pick the directive to use to print the jump table entries, and switch to 
+  // Pick the directive to use to print the jump table entries, and switch to
   // the appropriate section.
   const Function *F = MF->getFunction();
   bool JTInDiffSection = false;
@@ -978,18 +1048,18 @@ void AsmPrinter::EmitJumpTableInfo() {
     OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM));
   } else {
     // Otherwise, drop it in the readonly section.
-    const MCSection *ReadOnlySection = 
+    const MCSection *ReadOnlySection =
       getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly());
     OutStreamer.SwitchSection(ReadOnlySection);
     JTInDiffSection = true;
   }
 
   EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData())));
-  
+
   for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
     const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
-    
-    // If this jump table was deleted, ignore it. 
+
+    // If this jump table was deleted, ignore it.
     if (JTBBs.empty()) continue;
 
     // For the EK_LabelDifference32 entry, if the target supports .set, emit a
@@ -1003,15 +1073,15 @@ void AsmPrinter::EmitJumpTableInfo() {
       for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
         const MachineBasicBlock *MBB = JTBBs[ii];
         if (!EmittedSets.insert(MBB)) continue;
-        
+
         // .set LJTSet, LBB32-base
         const MCExpr *LHS =
           MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
         OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
                                 MCBinaryExpr::CreateSub(LHS, Base, OutContext));
       }
-    }          
-    
+    }
+
     // On some targets (e.g. Darwin) we want to emit two consecutive labels
     // before each jump table.  The first label is never referenced, but tells
     // the assembler and linker the extents of the jump table object.  The
@@ -1064,8 +1134,8 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
     // If the .set directive is supported, this is emitted as:
     //      .set L4_5_set_123, LBB123 - LJTI1_2
     //      .word L4_5_set_123
-    
-    // If we have emitted set directives for the jump table entries, print 
+
+    // If we have emitted set directives for the jump table entries, print
     // them rather than the entries themselves.  If we're emitting PIC, then
     // emit the table entries as differences between two text section labels.
     if (MAI->hasSetDirective()) {
@@ -1081,9 +1151,9 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
     break;
   }
   }
-  
+
   assert(Value && "Unknown entry kind!");
- 
+
   unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData());
   OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0);
 }
@@ -1103,18 +1173,18 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   if (GV->getSection() == "llvm.metadata" ||
       GV->hasAvailableExternallyLinkage())
     return true;
-  
+
   if (!GV->hasAppendingLinkage()) return false;
 
   assert(GV->hasInitializer() && "Not a special LLVM global!");
-  
+
   const TargetData *TD = TM.getTargetData();
   unsigned Align = Log2_32(TD->getPointerPrefAlignment());
   if (GV->getName() == "llvm.global_ctors") {
     OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection());
     EmitAlignment(Align);
     EmitXXStructorList(GV->getInitializer());
-    
+
     if (TM.getRelocationModel() == Reloc::Static &&
         MAI->hasStaticCtorDtorReferenceInStaticMode()) {
       StringRef Sym(".constructors_used");
@@ -1122,8 +1192,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
                                       MCSA_Reference);
     }
     return true;
-  } 
-  
+  }
+
   if (GV->getName() == "llvm.global_dtors") {
     OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection());
     EmitAlignment(Align);
@@ -1137,7 +1207,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
     }
     return true;
   }
-  
+
   return false;
 }
 
@@ -1148,7 +1218,7 @@ void AsmPrinter::EmitLLVMUsedList(Constant *List) {
   // Should be an array of 'i8*'.
   ConstantArray *InitList = dyn_cast<ConstantArray>(List);
   if (InitList == 0) return;
-  
+
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
     const GlobalValue *GV =
       dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
@@ -1157,7 +1227,7 @@ void AsmPrinter::EmitLLVMUsedList(Constant *List) {
   }
 }
 
-/// EmitXXStructorList - Emit the ctor or dtor list.  This just prints out the 
+/// EmitXXStructorList - Emit the ctor or dtor list.  This just prints out the
 /// function pointers, ignoring the init priority.
 void AsmPrinter::EmitXXStructorList(Constant *List) {
   // Should be an array of '{ int, void ()* }' structs.  The first value is the
@@ -1203,11 +1273,11 @@ void AsmPrinter::EmitInt32(int Value) const {
 void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
                                      unsigned Size) const {
   // Get the Hi-Lo expression.
-  const MCExpr *Diff = 
+  const MCExpr *Diff =
     MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext),
                             MCSymbolRefExpr::Create(Lo, OutContext),
                             OutContext);
-  
+
   if (!MAI->hasSetDirective()) {
     OutStreamer.EmitValue(Diff, Size, 0/*AddrSpace*/);
     return;
@@ -1219,27 +1289,27 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
   OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/);
 }
 
-/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" 
+/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo"
 /// where the size in bytes of the directive is specified by Size and Hi/Lo
 /// specify the labels.  This implicitly uses .set if it is available.
 void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
-                                           const MCSymbol *Lo, unsigned Size) 
+                                           const MCSymbol *Lo, unsigned Size)
   const {
-  
+
   // Emit Hi+Offset - Lo
   // Get the Hi+Offset expression.
   const MCExpr *Plus =
-    MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), 
+    MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext),
                             MCConstantExpr::Create(Offset, OutContext),
                             OutContext);
-  
+
   // Get the Hi+Offset-Lo expression.
-  const MCExpr *Diff = 
+  const MCExpr *Diff =
     MCBinaryExpr::CreateSub(Plus,
                             MCSymbolRefExpr::Create(Lo, OutContext),
                             OutContext);
-  
-  if (!MAI->hasSetDirective()) 
+
+  if (!MAI->hasSetDirective())
     OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/);
   else {
     // Otherwise, emit with .set (aka assignment).
@@ -1249,22 +1319,22 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
   }
 }
 
-/// EmitLabelPlusOffset - Emit something like ".long Label+Offset" 
+/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
 /// where the size in bytes of the directive is specified by Size and Label
 /// specifies the label.  This implicitly uses .set if it is available.
 void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
-                                      unsigned Size) 
+                                      unsigned Size)
   const {
-  
+
   // Emit Label+Offset
   const MCExpr *Plus =
-    MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext), 
+    MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext),
                             MCConstantExpr::Create(Offset, OutContext),
                             OutContext);
-  
+
   OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/);
 }
-    
+
 
 //===----------------------------------------------------------------------===//
 
@@ -1276,9 +1346,9 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
 //
 void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
   if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits);
-  
+
   if (NumBits == 0) return;   // 1-byte aligned: no need to emit alignment.
-  
+
   if (getCurrentSection()->getKind().isText())
     OutStreamer.EmitCodeAlignment(1 << NumBits);
   else
@@ -1293,25 +1363,25 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
 ///
 static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
   MCContext &Ctx = AP.OutContext;
-  
+
   if (CV->isNullValue() || isa<UndefValue>(CV))
     return MCConstantExpr::Create(0, Ctx);
 
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
     return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
-  
+
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
     return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
 
   if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
     return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
-  
+
   const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
   if (CE == 0) {
     llvm_unreachable("Unknown constant value to lower!");
     return MCConstantExpr::Create(0, Ctx);
   }
-  
+
   switch (CE->getOpcode()) {
   default:
     // If the code isn't optimized, there may be outstanding folding
@@ -1339,21 +1409,21 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
     SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
     int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), &IdxVec[0],
                                          IdxVec.size());
-    
+
     const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
     if (Offset == 0)
       return Base;
-    
+
     // Truncate/sext the offset to the pointer size.
     if (TD.getPointerSizeInBits() != 64) {
       int SExtAmount = 64-TD.getPointerSizeInBits();
       Offset = (Offset << SExtAmount) >> SExtAmount;
     }
-    
+
     return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
                                    Ctx);
   }
-      
+
   case Instruction::Trunc:
     // We emit the value and depend on the assembler to truncate the generated
     // expression properly.  This is important for differences between
@@ -1372,7 +1442,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
                                       false/*ZExt*/);
     return LowerConstant(Op, AP);
   }
-    
+
   case Instruction::PtrToInt: {
     const TargetData &TD = *AP.TM.getTargetData();
     // Support only foldable casts to/from pointers that can be eliminated by
@@ -1394,7 +1464,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
     const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
     return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
   }
-      
+
   // The MC library also has a right-shift operator, but it isn't consistently
   // signed or unsigned between different targets.
   case Instruction::Add:
@@ -1435,7 +1505,7 @@ static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
       EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
     return;
   }
-  
+
   // Otherwise, it can be emitted as .ascii.
   SmallVector<char, 128> TmpVec;
   TmpVec.reserve(CA->getNumOperands());
@@ -1493,7 +1563,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
     AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
     return;
   }
-  
+
   if (CFP->getType()->isFloatTy()) {
     if (AP.isVerbose()) {
       float Val = CFP->getValueAPF().convertToFloat();
@@ -1503,7 +1573,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
     AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace);
     return;
   }
-  
+
   if (CFP->getType()->isX86_FP80Ty()) {
     // all long double variants are printed as hex
     // API needed to prevent premature destruction
@@ -1518,7 +1588,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
       AP.OutStreamer.GetCommentOS() << "x86_fp80 ~= "
         << DoubleVal.convertToDouble() << '\n';
     }
-    
+
     if (AP.TM.getTargetData()->isBigEndian()) {
       AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
       AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
@@ -1526,14 +1596,14 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
       AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
       AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
     }
-    
+
     // Emit the tail padding for the long double.
     const TargetData &TD = *AP.TM.getTargetData();
     AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
                              TD.getTypeStoreSize(CFP->getType()), AddrSpace);
     return;
   }
-  
+
   assert(CFP->getType()->isPPC_FP128Ty() &&
          "Floating point constant type not handled");
   // All long double variants are printed as hex
@@ -1588,10 +1658,10 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
       return;
     }
   }
-  
+
   if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
     return EmitGlobalConstantArray(CVA, AddrSpace, AP);
-  
+
   if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
     return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
 
@@ -1603,10 +1673,10 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
     AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
     return;
   }
-  
+
   if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
     return EmitGlobalConstantVector(V, AddrSpace, AP);
-  
+
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
   AP.OutStreamer.EmitValue(LowerConstant(CV, AP),
@@ -1703,7 +1773,7 @@ MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
   SmallString<60> NameStr;
   Mang->getNameWithPrefix(NameStr, Sym);
   return OutContext.GetOrCreateSymbol(NameStr.str());
-}  
+}
 
 
 
@@ -1740,10 +1810,10 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB,
   // Add loop depth information
   const MachineLoop *Loop = LI->getLoopFor(&MBB);
   if (Loop == 0) return;
-  
+
   MachineBasicBlock *Header = Loop->getHeader();
   assert(Header && "No header for loop");
-  
+
   // If this block is not a loop header, just print out what is the loop header
   // and return.
   if (Header != &MBB) {
@@ -1753,21 +1823,21 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB,
                               " Depth="+Twine(Loop->getLoopDepth()));
     return;
   }
-  
+
   // Otherwise, it is a loop header.  Print out information about child and
   // parent loops.
   raw_ostream &OS = AP.OutStreamer.GetCommentOS();
-  
-  PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber()); 
-  
+
+  PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber());
+
   OS << "=>";
   OS.indent(Loop->getLoopDepth()*2-2);
-  
+
   OS << "This ";
   if (Loop->empty())
     OS << "Inner ";
   OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n';
-  
+
   PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
 }
 
@@ -1788,7 +1858,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
     const BasicBlock *BB = MBB->getBasicBlock();
     if (isVerbose())
       OutStreamer.AddComment("Block address taken");
-    
+
     std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB);
 
     for (unsigned i = 0, e = Syms.size(); i != e; ++i)
@@ -1801,9 +1871,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
       if (const BasicBlock *BB = MBB->getBasicBlock())
         if (BB->hasName())
           OutStreamer.AddComment("%" + BB->getName());
-      
+
       EmitBasicBlockLoopComments(*MBB, LI, *this);
-      
+
       // NOTE: Want this comment at start of line, don't emit with AddComment.
       OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
                               Twine(MBB->getNumber()) + ":");
@@ -1823,7 +1893,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
 void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
                                 bool IsDefinition) const {
   MCSymbolAttr Attr = MCSA_Invalid;
-  
+
   switch (Visibility) {
   default: break;
   case GlobalValue::HiddenVisibility:
@@ -1850,23 +1920,23 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
   // then nothing falls through to it.
   if (MBB->isLandingPad() || MBB->pred_empty())
     return false;
-  
+
   // If there isn't exactly one predecessor, it can't be a fall through.
   MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
   ++PI2;
   if (PI2 != MBB->pred_end())
     return false;
-  
+
   // The predecessor has to be immediately before this block.
   const MachineBasicBlock *Pred = *PI;
-  
+
   if (!Pred->isLayoutSuccessor(MBB))
     return false;
-  
+
   // If the block is completely empty, then it definitely does fall through.
   if (Pred->empty())
     return true;
-  
+
   // Otherwise, check the last instruction.
   const MachineInstr &LastInst = Pred->back();
   return !LastInst.getDesc().isBarrier();
@@ -1882,9 +1952,9 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
   gcp_map_type::iterator GCPI = GCMap.find(S);
   if (GCPI != GCMap.end())
     return GCPI->second;
-  
+
   const char *Name = S->getName().c_str();
-  
+
   for (GCMetadataPrinterRegistry::iterator
          I = GCMetadataPrinterRegistry::begin(),
          E = GCMetadataPrinterRegistry::end(); I != E; ++I)
@@ -1894,7 +1964,7 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
       GCMap.insert(std::make_pair(S, GMP));
       return GMP;
     }
-  
+
   report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
   return 0;
 }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 98a1bf2..6d1708a 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -155,7 +155,7 @@ void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
   const TargetLoweringObjectFile &TLOF = getObjFileLowering();
   
   const MCExpr *Exp =
-    TLOF.getExprForDwarfReference(Sym, Mang, MMI, Encoding, OutStreamer);
+    TLOF.getExprForDwarfReference(Sym, Encoding, OutStreamer);
   OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding));
 }
 
@@ -277,42 +277,42 @@ void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves,
   }
 }
 
-/// EmitFrameMoves - Emit frame instructions to describe the layout of the
-/// frame.
-void AsmPrinter::EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const {
+/// EmitFrameMoves - Emit a frame instruction.
+void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const {
   const TargetRegisterInfo *RI = TM.getRegisterInfo();
 
-  int stackGrowth = TM.getTargetData()->getPointerSize();
-  if (TM.getFrameLowering()->getStackGrowthDirection() !=
-      TargetFrameLowering::StackGrowsUp)
-    stackGrowth *= -1;
+  const MachineLocation &Dst = Move.getDestination();
+  const MachineLocation &Src = Move.getSource();
+
+  // If advancing cfa.
+  if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+    assert(!Src.isReg() && "Machine move not supported yet.");
 
+    if (Src.getReg() == MachineLocation::VirtualFP) {
+      OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset());
+    } else {
+      assert("Machine move not supported yet");
+      // Reg + Offset
+    }
+  } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+    assert(Dst.isReg() && "Machine move not supported yet.");
+    OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true));
+  } else {
+    assert(!Dst.isReg() && "Machine move not supported yet.");
+    OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true),
+                              Dst.getOffset());
+  }
+}
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+void AsmPrinter::EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const {
   for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
     const MachineMove &Move = Moves[i];
     MCSymbol *Label = Move.getLabel();
     // Throw out move if the label is invalid.
     if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
 
-    const MachineLocation &Dst = Move.getDestination();
-    const MachineLocation &Src = Move.getSource();
-
-    // If advancing cfa.
-    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
-      assert(!Src.isReg() && "Machine move not supported yet.");
-
-      if (Src.getReg() == MachineLocation::VirtualFP) {
-        OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset());
-      } else {
-        assert("Machine move not supported yet");
-        // Reg + Offset
-      }
-    } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
-      assert(Dst.isReg() && "Machine move not supported yet.");
-      OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true));
-    } else {
-      assert(!Dst.isReg() && "Machine move not supported yet.");
-      OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true),
-                                Dst.getOffset());
-    }
+    EmitCFIFrameMove(Move);
   }
 }
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 306efad..d2be552 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -1,9 +1,11 @@
 add_llvm_library(LLVMAsmPrinter
+  ARMException.cpp
   AsmPrinter.cpp
   AsmPrinterDwarf.cpp
   AsmPrinterInlineAsm.cpp
   DIE.cpp
   DwarfCFIException.cpp
+  DwarfCompileUnit.cpp
   DwarfDebug.cpp
   DwarfException.cpp
   DwarfTableException.cpp
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index d56c094..7d61f1e 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -70,7 +70,6 @@ namespace llvm {
 
   public:
     DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {}
-    virtual ~DIEAbbrev() {}
 
     // Accessors.
     unsigned getTag() const { return Tag; }
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 68be2ee..dbd52c4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
@@ -40,7 +41,7 @@ using namespace llvm;
 
 DwarfCFIException::DwarfCFIException(AsmPrinter *A)
   : DwarfException(A),
-    shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
+    shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false)
     {}
 
 DwarfCFIException::~DwarfCFIException() {}
@@ -51,81 +52,85 @@ void DwarfCFIException::EndModule() {
   if (!Asm->MAI->isExceptionHandlingDwarf())
     return;
 
-  if (!shouldEmitTableModule)
-    return;
-
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
 
-  // Begin eh frame section.
-  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+  if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel)
+    return;
 
   // Emit references to all used personality functions
+  bool AtLeastOne = false;
   const std::vector<const Function*> &Personalities = MMI->getPersonalities();
   for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("personality", i));
-    Asm->EmitReference(Personalities[i], PerEncoding);
+    if (!Personalities[i])
+      continue;
+    MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]);
+    TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym);
+    AtLeastOne = true;
+  }
+
+  if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) {
+    // This is a temporary hack to keep sections in the same order they
+    // were before. This lets us produce bit identical outputs while
+    // transitioning to CFI.
+    Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
   }
 }
 
 /// BeginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
 void DwarfCFIException::BeginFunction(const MachineFunction *MF) {
-  shouldEmitTable = shouldEmitMoves = false;
+  shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
 
   // If any landing pads survive, we need an EH table.
-  shouldEmitTable = !MMI->getLandingPads().empty();
+  bool hasLandingPads = !MMI->getLandingPads().empty();
 
   // See if we need frame move info.
-  shouldEmitMoves =
-    !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
+  shouldEmitMoves = Asm->needsCFIMoves();
 
-  if (shouldEmitMoves || shouldEmitTable)
-    // Assumes in correct section after the entry point.
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
-                                                  Asm->getFunctionNumber()));
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
+  const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
 
-  shouldEmitTableModule |= shouldEmitTable;
+  shouldEmitPersonality = hasLandingPads &&
+    PerEncoding != dwarf::DW_EH_PE_omit && Per;
 
-  if (shouldEmitMoves) {
-    const TargetFrameLowering *TFL = Asm->TM.getFrameLowering();
-    Asm->OutStreamer.EmitCFIStartProc();
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  shouldEmitLSDA = shouldEmitPersonality &&
+    LSDAEncoding != dwarf::DW_EH_PE_omit;
 
-    // Indicate locations of general callee saved registers in frame.
-    std::vector<MachineMove> Moves;
-    TFL->getInitialFrameState(Moves);
-    Asm->EmitCFIFrameMoves(Moves);
-    Asm->EmitCFIFrameMoves(MMI->getFrameMoves());
-  }
+  if (!shouldEmitPersonality && !shouldEmitMoves)
+    return;
 
-  if (!shouldEmitTable)
+  Asm->OutStreamer.EmitCFIStartProc();
+
+  // Indicate personality routine, if any.
+  if (!shouldEmitPersonality)
     return;
 
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
+  Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+                                                Asm->getFunctionNumber()));
 
   // Provide LSDA information.
-  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
-  if (LSDAEncoding != dwarf::DW_EH_PE_omit)
-    Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
-                                                    Asm->getFunctionNumber()),
-                                 LSDAEncoding);
+  if (!shouldEmitLSDA)
+    return;
 
-  // Indicate personality routine, if any.
-  unsigned PerEncoding = TLOF.getPersonalityEncoding();
-  if (PerEncoding != dwarf::DW_EH_PE_omit &&
-      MMI->getPersonalities()[MMI->getPersonalityIndex()])
-    Asm->OutStreamer.EmitCFIPersonality(Asm->GetTempSymbol("personality",
-                                                    MMI->getPersonalityIndex()),
-                                        PerEncoding);
+  Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
+                                                  Asm->getFunctionNumber()),
+                               LSDAEncoding);
 }
 
 /// EndFunction - Gather and emit post-function exception information.
 ///
 void DwarfCFIException::EndFunction() {
-  if (!shouldEmitMoves && !shouldEmitTable) return;
+  if (!shouldEmitPersonality && !shouldEmitMoves)
+    return;
 
-  if (shouldEmitMoves)
-    Asm->OutStreamer.EmitCFIEndProc();
+  Asm->OutStreamer.EmitCFIEndProc();
 
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
                                                 Asm->getFunctionNumber()));
@@ -133,6 +138,6 @@ void DwarfCFIException::EndFunction() {
   // Map all labels and get rid of any dead landing pads.
   MMI->TidyLandingPads();
 
-  if (shouldEmitTable)
+  if (shouldEmitPersonality)
     EmitExceptionTable();
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
new file mode 100644
index 0000000..7ce0cfe
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -0,0 +1,983 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Unit ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfdebug"
+
+#include "DwarfCompileUnit.h"
+#include "DwarfDebug.h"
+#include "llvm/Constants.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+/// CompileUnit - Compile unit constructor.
+CompileUnit::CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW)
+  : ID(I), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
+  DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
+}
+
+/// ~CompileUnit - Destructor for compile unit.
+CompileUnit::~CompileUnit() {
+  for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
+    DIEBlocks[j]->~DIEBlock();
+}
+
+/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+/// information entry.
+DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) {
+  DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
+  return Value;
+}
+
+/// addUInt - Add an unsigned integer attribute data and value.
+///
+void CompileUnit::addUInt(DIE *Die, unsigned Attribute,
+                          unsigned Form, uint64_t Integer) {
+  if (!Form) Form = DIEInteger::BestForm(false, Integer);
+  DIEValue *Value = Integer == 1 ?
+    DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addSInt - Add an signed integer attribute data and value.
+///
+void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
+                          unsigned Form, int64_t Integer) {
+  if (!Form) Form = DIEInteger::BestForm(true, Integer);
+  DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addString - Add a string attribute data and value. DIEString only
+/// keeps string reference.
+void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form,
+                            StringRef String) {
+  DIEValue *Value = new (DIEValueAllocator) DIEString(String);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addLabel - Add a Dwarf label attribute data and value.
+///
+void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+                           const MCSymbol *Label) {
+  DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addDelta - Add a label delta attribute data and value.
+///
+void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+                           const MCSymbol *Hi, const MCSymbol *Lo) {
+  DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addDIEEntry - Add a DIE attribute data and value.
+///
+void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
+                              DIE *Entry) {
+  Die->addValue(Attribute, Form, createDIEEntry(Entry));
+}
+
+
+/// addBlock - Add block data.
+///
+void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
+                           DIEBlock *Block) {
+  Block->ComputeSize(Asm);
+  DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
+  Die->addValue(Attribute, Block->BestForm(), Block);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
+  // Verify variable.
+  if (!V.Verify())
+    return;
+  
+  unsigned Line = V.getLineNumber();
+  if (Line == 0)
+    return;
+  unsigned FileID = DD->GetOrCreateSourceID(V.getContext().getFilename(),
+                                            V.getContext().getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
+  // Verify global variable.
+  if (!G.Verify())
+    return;
+
+  unsigned Line = G.getLineNumber();
+  if (Line == 0)
+    return;
+  unsigned FileID = DD->GetOrCreateSourceID(G.getContext().getFilename(),
+                                            G.getContext().getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
+  // Verify subprogram.
+  if (!SP.Verify())
+    return;
+  // If the line number is 0, don't add it.
+  if (SP.getLineNumber() == 0)
+    return;
+
+  unsigned Line = SP.getLineNumber();
+  if (!SP.getContext().Verify())
+    return;
+  unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
+  // Verify type.
+  if (!Ty.Verify())
+    return;
+
+  unsigned Line = Ty.getLineNumber();
+  if (Line == 0 || !Ty.getContext().Verify())
+    return;
+  unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) {
+  // Verify namespace.
+  if (!NS.Verify())
+    return;
+
+  unsigned Line = NS.getLineNumber();
+  if (Line == 0)
+    return;
+  StringRef FN = NS.getFilename();
+
+  unsigned FileID = DD->GetOrCreateSourceID(FN, NS.getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addVariableAddress - Add DW_AT_location attribute for a 
+/// DbgVariable based on provided MachineLocation.
+void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, 
+                                     MachineLocation Location) {
+  if (DV->variableHasComplexAddress())
+    addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
+  else if (DV->isBlockByrefVariable())
+    addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
+  else
+    addAddress(Die, dwarf::DW_AT_location, Location);
+}
+
+/// addRegisterOp - Add register operand.
+void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) {
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  unsigned DWReg = RI->getDwarfRegNum(Reg, false);
+  if (DWReg < 32)
+    addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg);
+  else {
+    addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+    addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+  }
+}
+
+/// addRegisterOffset - Add register offset.
+void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg,
+                                    int64_t Offset) {
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  unsigned DWReg = RI->getDwarfRegNum(Reg, false);
+  const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+  if (Reg == TRI->getFrameRegister(*Asm->MF))
+    // If variable offset is based in frame register then use fbreg.
+    addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
+  else if (DWReg < 32)
+    addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg);
+  else {
+    addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+    addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+  }
+  addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset);
+}
+
+/// addAddress - Add an address attribute to a die based on the location
+/// provided.
+void CompileUnit::addAddress(DIE *Die, unsigned Attribute,
+                             const MachineLocation &Location) {
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+  if (Location.isReg())
+    addRegisterOp(Block, Location.getReg());
+  else
+    addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+  // Now attach the location information to the DIE.
+  addBlock(Die, Attribute, 0, Block);
+}
+
+/// addComplexAddress - Start with the address based on the location provided,
+/// and generate the DWARF information necessary to find the actual variable
+/// given the extra address information encoded in the DIVariable, starting from
+/// the starting location.  Add the DWARF information to the die.
+///
+void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die,
+                                    unsigned Attribute,
+                                    const MachineLocation &Location) {
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  unsigned N = DV->getNumAddrElements();
+  unsigned i = 0;
+  if (Location.isReg()) {
+    if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) {
+      // If first address element is OpPlus then emit
+      // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+      addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1));
+      i = 2;
+    } else
+      addRegisterOp(Block, Location.getReg());
+  }
+  else
+    addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+
+  for (;i < N; ++i) {
+    uint64_t Element = DV->getAddrElement(i);
+    if (Element == DIBuilder::OpPlus) {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
+    } else if (Element == DIBuilder::OpDeref) {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    } else llvm_unreachable("unknown DIBuilder Opcode");
+  }
+
+  // Now attach the location information to the DIE.
+  addBlock(Die, Attribute, 0, Block);
+}
+
+/* Byref variables, in Blocks, are declared by the programmer as "SomeType
+   VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
+   gives the variable VarName either the struct, or a pointer to the struct, as
+   its type.  This is necessary for various behind-the-scenes things the
+   compiler needs to do with by-reference variables in Blocks.
+
+   However, as far as the original *programmer* is concerned, the variable
+   should still have type 'SomeType', as originally declared.
+
+   The function getBlockByrefType dives into the __Block_byref_x_VarName
+   struct to find the original type of the variable, which is then assigned to
+   the variable's Debug Information Entry as its real type.  So far, so good.
+   However now the debugger will expect the variable VarName to have the type
+   SomeType.  So we need the location attribute for the variable to be an
+   expression that explains to the debugger how to navigate through the
+   pointers and struct to find the actual variable of type SomeType.
+
+   The following function does just that.  We start by getting
+   the "normal" location for the variable. This will be the location
+   of either the struct __Block_byref_x_VarName or the pointer to the
+   struct __Block_byref_x_VarName.
+
+   The struct will look something like:
+
+   struct __Block_byref_x_VarName {
+     ... <various fields>
+     struct __Block_byref_x_VarName *forwarding;
+     ... <various other fields>
+     SomeType VarName;
+     ... <maybe more fields>
+   };
+
+   If we are given the struct directly (as our starting point) we
+   need to tell the debugger to:
+
+   1).  Add the offset of the forwarding field.
+
+   2).  Follow that pointer to get the real __Block_byref_x_VarName
+   struct to use (the real one may have been copied onto the heap).
+
+   3).  Add the offset for the field VarName, to find the actual variable.
+
+   If we started with a pointer to the struct, then we need to
+   dereference that pointer first, before the other steps.
+   Translating this into DWARF ops, we will need to append the following
+   to the current location description for the variable:
+
+   DW_OP_deref                    -- optional, if we start with a pointer
+   DW_OP_plus_uconst <forward_fld_offset>
+   DW_OP_deref
+   DW_OP_plus_uconst <varName_fld_offset>
+
+   That is what this function does.  */
+
+/// addBlockByrefAddress - Start with the address based on the location
+/// provided, and generate the DWARF information necessary to find the
+/// actual Block variable (navigating the Block struct) based on the
+/// starting location.  Add the DWARF information to the die.  For
+/// more information, read large comment just above here.
+///
+void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
+                                       unsigned Attribute,
+                                       const MachineLocation &Location) {
+  DIType Ty = DV->getType();
+  DIType TmpTy = Ty;
+  unsigned Tag = Ty.getTag();
+  bool isPointer = false;
+
+  StringRef varName = DV->getName();
+
+  if (Tag == dwarf::DW_TAG_pointer_type) {
+    DIDerivedType DTy = DIDerivedType(Ty);
+    TmpTy = DTy.getTypeDerivedFrom();
+    isPointer = true;
+  }
+
+  DICompositeType blockStruct = DICompositeType(TmpTy);
+
+  // Find the __forwarding field and the variable field in the __Block_byref
+  // struct.
+  DIArray Fields = blockStruct.getTypeArray();
+  DIDescriptor varField = DIDescriptor();
+  DIDescriptor forwardingField = DIDescriptor();
+
+  for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
+    DIDescriptor Element = Fields.getElement(i);
+    DIDerivedType DT = DIDerivedType(Element);
+    StringRef fieldName = DT.getName();
+    if (fieldName == "__forwarding")
+      forwardingField = Element;
+    else if (fieldName == varName)
+      varField = Element;
+  }
+
+  // Get the offsets for the forwarding field and the variable field.
+  unsigned forwardingFieldOffset =
+    DIDerivedType(forwardingField).getOffsetInBits() >> 3;
+  unsigned varFieldOffset =
+    DIDerivedType(varField).getOffsetInBits() >> 3;
+
+  // Decode the original location, and use that as the start of the byref
+  // variable's location.
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+  if (Location.isReg()) {
+    if (Reg < 32)
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+    else {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+  } else {
+    if (Reg < 32)
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+    else {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+
+    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+  }
+
+  // If we started with a pointer to the __Block_byref... struct, then
+  // the first thing we need to do is dereference the pointer (DW_OP_deref).
+  if (isPointer)
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+  // Next add the offset for the '__forwarding' field:
+  // DW_OP_plus_uconst ForwardingFieldOffset.  Note there's no point in
+  // adding the offset if it's 0.
+  if (forwardingFieldOffset > 0) {
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
+  }
+
+  // Now dereference the __forwarding field to get to the real __Block_byref
+  // struct:  DW_OP_deref.
+  addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+  // Now that we've got the real __Block_byref... struct, add the offset
+  // for the variable's field to get to the location of the actual variable:
+  // DW_OP_plus_uconst varFieldOffset.  Again, don't add if it's 0.
+  if (varFieldOffset > 0) {
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
+  }
+
+  // Now attach the location information to the DIE.
+  addBlock(Die, Attribute, 0, Block);
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO) {
+  assert (MO.isImm() && "Invalid machine operand!");
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  unsigned Imm = MO.getImm();
+  addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  return true;
+}
+
+/// addConstantFPValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
+  assert (MO.isFPImm() && "Invalid machine operand!");
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  APFloat FPImm = MO.getFPImm()->getValueAPF();
+
+  // Get the raw data form of the floating point.
+  const APInt FltVal = FPImm.bitcastToAPInt();
+  const char *FltPtr = (const char*)FltVal.getRawData();
+
+  int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
+  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  int Incr = (LittleEndian ? 1 : -1);
+  int Start = (LittleEndian ? 0 : NumBytes - 1);
+  int Stop = (LittleEndian ? NumBytes : -1);
+
+  // Output the constant to DWARF one byte at a time.
+  for (; Start != Stop; Start += Incr)
+    addUInt(Block, 0, dwarf::DW_FORM_data1,
+            (unsigned char)0xFF & FltPtr[Start]);
+
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  return true;
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, ConstantInt *CI,
+                                   bool Unsigned) {
+  if (CI->getBitWidth() <= 64) {
+    if (Unsigned)
+      addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+              CI->getZExtValue());
+    else
+      addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
+              CI->getSExtValue());
+    return true;
+  }
+
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+  // Get the raw data form of the large APInt.
+  const APInt Val = CI->getValue();
+  const char *Ptr = (const char*)Val.getRawData();
+
+  int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
+  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  int Incr = (LittleEndian ? 1 : -1);
+  int Start = (LittleEndian ? 0 : NumBytes - 1);
+  int Stop = (LittleEndian ? NumBytes : -1);
+
+  // Output the constant to DWARF one byte at a time.
+  for (; Start != Stop; Start += Incr)
+    addUInt(Block, 0, dwarf::DW_FORM_data1,
+            (unsigned char)0xFF & Ptr[Start]);
+
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  return true;
+}
+
+/// addTemplateParams - Add template parameters in buffer.
+void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
+  // Add template parameters.
+  for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
+    DIDescriptor Element = TParams.getElement(i);
+    if (Element.isTemplateTypeParameter())
+      Buffer.addChild(getOrCreateTemplateTypeParameterDIE(
+                        DITemplateTypeParameter(Element)));
+    else if (Element.isTemplateValueParameter())
+      Buffer.addChild(getOrCreateTemplateValueParameterDIE(
+                        DITemplateValueParameter(Element)));
+  }
+
+}
+/// addToContextOwner - Add Die into the list of its context owner's children.
+void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
+  if (Context.isType()) {
+    DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context));
+    ContextDIE->addChild(Die);
+  } else if (Context.isNameSpace()) {
+    DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
+    ContextDIE->addChild(Die);
+  } else if (Context.isSubprogram()) {
+    DIE *ContextDIE = DD->createSubprogramDIE(DISubprogram(Context));
+    ContextDIE->addChild(Die);
+  } else if (DIE *ContextDIE = getDIE(Context))
+    ContextDIE->addChild(Die);
+  else
+    addDie(Die);
+}
+
+/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+/// given DIType.
+DIE *CompileUnit::getOrCreateTypeDIE(DIType Ty) {
+  DIE *TyDIE = getDIE(Ty);
+  if (TyDIE)
+    return TyDIE;
+
+  // Create new type.
+  TyDIE = new DIE(dwarf::DW_TAG_base_type);
+  insertDIE(Ty, TyDIE);
+  if (Ty.isBasicType())
+    constructTypeDIE(*TyDIE, DIBasicType(Ty));
+  else if (Ty.isCompositeType())
+    constructTypeDIE(*TyDIE, DICompositeType(Ty));
+  else {
+    assert(Ty.isDerivedType() && "Unknown kind of DIType");
+    constructTypeDIE(*TyDIE, DIDerivedType(Ty));
+  }
+
+  addToContextOwner(TyDIE, Ty.getContext());
+  return TyDIE;
+}
+
+/// addType - Add a new type attribute to the specified entity.
+void CompileUnit::addType(DIE *Entity, DIType Ty) {
+  if (!Ty.Verify())
+    return;
+
+  // Check for pre-existence.
+  DIEEntry *Entry = getDIEEntry(Ty);
+  // If it exists then use the existing value.
+  if (Entry) {
+    Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+    return;
+  }
+
+  // Construct type.
+  DIE *Buffer = getOrCreateTypeDIE(Ty);
+
+  // Set up proxy.
+  Entry = createDIEEntry(Buffer);
+  insertDIEEntry(Ty, Entry);
+
+  Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+}
+
+/// constructTypeDIE - Construct basic type die from DIBasicType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
+  // Get core information.
+  StringRef Name = BTy.getName();
+  Buffer.setTag(dwarf::DW_TAG_base_type);
+  addUInt(&Buffer, dwarf::DW_AT_encoding,  dwarf::DW_FORM_data1,
+          BTy.getEncoding());
+
+  // Add name if not anonymous or intermediate type.
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  uint64_t Size = BTy.getSizeInBits() >> 3;
+  addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+}
+
+/// constructTypeDIE - Construct derived type die from DIDerivedType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
+  // Get core information.
+  StringRef Name = DTy.getName();
+  uint64_t Size = DTy.getSizeInBits() >> 3;
+  unsigned Tag = DTy.getTag();
+
+  // FIXME - Workaround for templates.
+  if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type;
+
+  Buffer.setTag(Tag);
+
+  // Map to main type, void will not have a type.
+  DIType FromTy = DTy.getTypeDerivedFrom();
+  addType(&Buffer, FromTy);
+
+  // Add name if not anonymous or intermediate type.
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+  // Add size if non-zero (derived types might be zero-sized.)
+  if (Size)
+    addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+
+  // Add source line info if available and TyDesc is not a forward declaration.
+  if (!DTy.isForwardDecl())
+    addSourceLine(&Buffer, DTy);
+}
+
+/// constructTypeDIE - Construct type DIE from DICompositeType.
+void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
+  // Get core information.
+  StringRef Name = CTy.getName();
+
+  uint64_t Size = CTy.getSizeInBits() >> 3;
+  unsigned Tag = CTy.getTag();
+  Buffer.setTag(Tag);
+
+  switch (Tag) {
+  case dwarf::DW_TAG_vector_type:
+  case dwarf::DW_TAG_array_type:
+    constructArrayTypeDIE(Buffer, &CTy);
+    break;
+  case dwarf::DW_TAG_enumeration_type: {
+    DIArray Elements = CTy.getTypeArray();
+
+    // Add enumerators to enumeration type.
+    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+      DIE *ElemDie = NULL;
+      DIDescriptor Enum(Elements.getElement(i));
+      if (Enum.isEnumerator()) {
+        ElemDie = constructEnumTypeDIE(DIEnumerator(Enum));
+        Buffer.addChild(ElemDie);
+      }
+    }
+  }
+    break;
+  case dwarf::DW_TAG_subroutine_type: {
+    // Add return type.
+    DIArray Elements = CTy.getTypeArray();
+    DIDescriptor RTy = Elements.getElement(0);
+    addType(&Buffer, DIType(RTy));
+
+    bool isPrototyped = true;
+    // Add arguments.
+    for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
+      DIDescriptor Ty = Elements.getElement(i);
+      if (Ty.isUnspecifiedParameter()) {
+        DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters);
+        Buffer.addChild(Arg);
+        isPrototyped = false;
+      } else {
+        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+        addType(Arg, DIType(Ty));
+        Buffer.addChild(Arg);
+      }
+    }
+    // Add prototype flag.
+    if (isPrototyped)
+      addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+  }
+    break;
+  case dwarf::DW_TAG_structure_type:
+  case dwarf::DW_TAG_union_type:
+  case dwarf::DW_TAG_class_type: {
+    // Add elements to structure type.
+    DIArray Elements = CTy.getTypeArray();
+
+    // A forward struct declared type may not have elements available.
+    unsigned N = Elements.getNumElements();
+    if (N == 0)
+      break;
+
+    // Add elements to structure type.
+    for (unsigned i = 0; i < N; ++i) {
+      DIDescriptor Element = Elements.getElement(i);
+      DIE *ElemDie = NULL;
+      if (Element.isSubprogram()) {
+        DISubprogram SP(Element);
+        ElemDie = DD->createSubprogramDIE(DISubprogram(Element));
+        if (SP.isProtected())
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+                  dwarf::DW_ACCESS_protected);
+        else if (SP.isPrivate())
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+                  dwarf::DW_ACCESS_private);
+        else 
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_public);
+        if (SP.isExplicit())
+          addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
+      }
+      else if (Element.isVariable()) {
+        DIVariable DV(Element);
+        ElemDie = new DIE(dwarf::DW_TAG_variable);
+        addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+                  DV.getName());
+        addType(ElemDie, DV.getType());
+        addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+        addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+        addSourceLine(ElemDie, DV);
+      } else if (Element.isDerivedType())
+        ElemDie = createMemberDIE(DIDerivedType(Element));
+      else
+        continue;
+      Buffer.addChild(ElemDie);
+    }
+
+    if (CTy.isAppleBlockExtension())
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
+
+    unsigned RLang = CTy.getRunTimeLang();
+    if (RLang)
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+              dwarf::DW_FORM_data1, RLang);
+
+    DICompositeType ContainingType = CTy.getContainingType();
+    if (DIDescriptor(ContainingType).isCompositeType())
+      addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+                  getOrCreateTypeDIE(DIType(ContainingType)));
+    else {
+      DIDescriptor Context = CTy.getContext();
+      addToContextOwner(&Buffer, Context);
+    }
+
+    if (Tag == dwarf::DW_TAG_class_type) 
+      addTemplateParams(Buffer, CTy.getTemplateParams());
+
+    break;
+  }
+  default:
+    break;
+  }
+
+  // Add name if not anonymous or intermediate type.
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+  if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
+      || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
+    {
+    // Add size if non-zero (derived types might be zero-sized.)
+    if (Size)
+      addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+    else {
+      // Add zero size if it is not a forward declaration.
+      if (CTy.isForwardDecl())
+        addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+      else
+        addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+    }
+
+    // Add source line info if available.
+    if (!CTy.isForwardDecl())
+      addSourceLine(&Buffer, CTy);
+  }
+}
+
+/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+/// for the given DITemplateTypeParameter.
+DIE *
+CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
+  DIE *ParamDIE = getDIE(TP);
+  if (ParamDIE)
+    return ParamDIE;
+
+  ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
+  addType(ParamDIE, TP.getType());
+  addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
+  return ParamDIE;
+}
+
+/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
+/// for the given DITemplateValueParameter.
+DIE *
+CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) {
+  DIE *ParamDIE = getDIE(TPV);
+  if (ParamDIE)
+    return ParamDIE;
+
+  ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
+  addType(ParamDIE, TPV.getType());
+  if (!TPV.getName().empty())
+    addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
+  addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, 
+          TPV.getValue());
+  return ParamDIE;
+}
+
+/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
+  DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
+  addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+  int64_t L = SR.getLo();
+  int64_t H = SR.getHi();
+
+  // The L value defines the lower bounds which is typically zero for C/C++. The
+  // H value is the upper bounds.  Values are 64 bit.  H - L + 1 is the size
+  // of the array. If L > H then do not emit DW_AT_lower_bound and 
+  // DW_AT_upper_bound attributes. If L is zero and H is also zero then the
+  // array has one element and in such case do not emit lower bound.
+
+  if (L > H) {
+    Buffer.addChild(DW_Subrange);
+    return;
+  }
+  if (L)
+    addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+  addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+  Buffer.addChild(DW_Subrange);
+}
+
+/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
+                                        DICompositeType *CTy) {
+  Buffer.setTag(dwarf::DW_TAG_array_type);
+  if (CTy->getTag() == dwarf::DW_TAG_vector_type)
+    addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
+
+  // Emit derived type.
+  addType(&Buffer, CTy->getTypeDerivedFrom());
+  DIArray Elements = CTy->getTypeArray();
+
+  // Get an anonymous type for index type.
+  DIE *IdxTy = getIndexTyDie();
+  if (!IdxTy) {
+    // Construct an anonymous type for index type.
+    IdxTy = new DIE(dwarf::DW_TAG_base_type);
+    addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
+    addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+            dwarf::DW_ATE_signed);
+    addDie(IdxTy);
+    setIndexTyDie(IdxTy);
+  }
+
+  // Add subranges to array type.
+  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+    DIDescriptor Element = Elements.getElement(i);
+    if (Element.getTag() == dwarf::DW_TAG_subrange_type)
+      constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
+  }
+}
+
+/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) {
+  DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
+  StringRef Name = ETy.getName();
+  addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  int64_t Value = ETy.getEnumValue();
+  addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
+  return Enumerator;
+}
+
+/// createMemberDIE - Create new member DIE.
+DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
+  DIE *MemberDie = new DIE(DT.getTag());
+  StringRef Name = DT.getName();
+  if (!Name.empty())
+    addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+  addType(MemberDie, DT.getTypeDerivedFrom());
+
+  addSourceLine(MemberDie, DT);
+
+  DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
+  addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+
+  uint64_t Size = DT.getSizeInBits();
+  uint64_t FieldSize = DT.getOriginalTypeSize();
+
+  if (Size != FieldSize) {
+    // Handle bitfield.
+    addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
+    addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
+
+    uint64_t Offset = DT.getOffsetInBits();
+    uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+    uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+    uint64_t FieldOffset = (HiMark - FieldSize);
+    Offset -= FieldOffset;
+
+    // Maybe we need to work from the other end.
+    if (Asm->getTargetData().isLittleEndian())
+      Offset = FieldSize - (Offset + Size);
+    addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
+
+    // Here WD_AT_data_member_location points to the anonymous
+    // field that includes this bit field.
+    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
+
+  } else
+    // This is not a bitfield.
+    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
+
+  if (DT.getTag() == dwarf::DW_TAG_inheritance
+      && DT.isVirtual()) {
+
+    // For C++, virtual base classes are not at fixed offset. Use following
+    // expression to extract appropriate offset from vtable.
+    // BaseAddr = ObAddr + *((*ObAddr) - Offset)
+
+    DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock();
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+
+    addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
+             VBaseLocationDie);
+  } else
+    addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
+
+  if (DT.isProtected())
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_protected);
+  else if (DT.isPrivate())
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_private);
+  // Otherwise C++ member and base classes are considered public.
+  else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus)
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_public);
+  if (DT.isVirtual())
+    addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
+            dwarf::DW_VIRTUALITY_virtual);
+
+  // Objective-C properties.
+  StringRef PropertyName = DT.getObjCPropertyName();
+  if (!PropertyName.empty()) {
+    addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string,
+              PropertyName);
+    StringRef GetterName = DT.getObjCPropertyGetterName();
+    if (!GetterName.empty())
+      addString(MemberDie, dwarf::DW_AT_APPLE_property_getter,
+                dwarf::DW_FORM_string, GetterName);
+    StringRef SetterName = DT.getObjCPropertySetterName();
+    if (!SetterName.empty())
+      addString(MemberDie, dwarf::DW_AT_APPLE_property_setter,
+                dwarf::DW_FORM_string, SetterName);
+    unsigned PropertyAttributes = 0;
+    if (DT.isReadOnlyObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
+    if (DT.isReadWriteObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
+    if (DT.isAssignObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
+    if (DT.isRetainObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
+    if (DT.isCopyObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
+    if (DT.isNonAtomicObjCProperty())
+      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
+    if (PropertyAttributes)
+      addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0, 
+              PropertyAttributes);
+  }
+  return MemberDie;
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
new file mode 100644
index 0000000..f4f6fb8
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -0,0 +1,282 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+
+#include "DIE.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/OwningPtr.h"
+
+namespace llvm {
+
+class DwarfDebug;
+class MachineLocation;
+class MachineOperand;
+class ConstantInt;
+class DbgVariable;
+
+//===----------------------------------------------------------------------===//
+/// CompileUnit - This dwarf writer support class manages information associate
+/// with a source file.
+class CompileUnit {
+  /// ID - File identifier for source.
+  ///
+  unsigned ID;
+
+  /// Die - Compile unit debug information entry.
+  ///
+  const OwningPtr<DIE> CUDie;
+
+  /// Asm - Target of Dwarf emission.
+  AsmPrinter *Asm;
+
+  DwarfDebug *DD;
+
+  /// IndexTyDie - An anonymous type for index type.  Owned by CUDie.
+  DIE *IndexTyDie;
+
+  /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton
+  /// variables to debug information entries.
+  DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
+
+  /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton
+  /// descriptors to debug information entries using a DIEEntry proxy.
+  DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
+
+  /// Globals - A map of globally visible named entities for this unit.
+  ///
+  StringMap<DIE*> Globals;
+
+  /// GlobalTypes - A map of globally visible types for this unit.
+  ///
+  StringMap<DIE*> GlobalTypes;
+
+  /// DIEBlocks - A list of all the DIEBlocks in use.
+  std::vector<DIEBlock *> DIEBlocks;
+
+public:
+  CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW);
+  ~CompileUnit();
+
+  // Accessors.
+  unsigned getID()                  const { return ID; }
+  DIE* getCUDie()                   const { return CUDie.get(); }
+  const StringMap<DIE*> &getGlobals()     const { return Globals; }
+  const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
+
+  /// hasContent - Return true if this compile unit has something to write out.
+  ///
+  bool hasContent() const { return !CUDie->getChildren().empty(); }
+
+  /// addGlobal - Add a new global entity to the compile unit.
+  ///
+  void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; }
+
+  /// addGlobalType - Add a new global type to the compile unit.
+  ///
+  void addGlobalType(StringRef Name, DIE *Die) {
+    GlobalTypes[Name] = Die;
+  }
+
+  /// getDIE - Returns the debug information entry map slot for the
+  /// specified debug variable.
+  DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
+
+  DIEBlock *getDIEBlock() { 
+    return new (DIEValueAllocator) DIEBlock();
+  }
+
+  /// insertDIE - Insert DIE into the map.
+  void insertDIE(const MDNode *N, DIE *D) {
+    MDNodeToDieMap.insert(std::make_pair(N, D));
+  }
+
+  /// getDIEEntry - Returns the debug information entry for the speciefied
+  /// debug variable.
+  DIEEntry *getDIEEntry(const MDNode *N) {
+    DenseMap<const MDNode *, DIEEntry *>::iterator I =
+      MDNodeToDIEEntryMap.find(N);
+    if (I == MDNodeToDIEEntryMap.end())
+      return NULL;
+    return I->second;
+  }
+
+  /// insertDIEEntry - Insert debug information entry into the map.
+  void insertDIEEntry(const MDNode *N, DIEEntry *E) {
+    MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
+  }
+
+  /// addDie - Adds or interns the DIE to the compile unit.
+  ///
+  void addDie(DIE *Buffer) {
+    this->CUDie->addChild(Buffer);
+  }
+
+  // getIndexTyDie - Get an anonymous type for index type.
+  DIE *getIndexTyDie() {
+    return IndexTyDie;
+  }
+
+  // setIndexTyDie - Set D as anonymous type for index which can be reused
+  // later.
+  void setIndexTyDie(DIE *D) {
+    IndexTyDie = D;
+  }
+public:
+
+  /// addUInt - Add an unsigned integer attribute data and value.
+  ///
+  void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
+
+  /// addSInt - Add an signed integer attribute data and value.
+  ///
+  void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
+
+  /// addString - Add a string attribute data and value.
+  ///
+  void addString(DIE *Die, unsigned Attribute, unsigned Form,
+                 const StringRef Str);
+
+  /// addLabel - Add a Dwarf label attribute data and value.
+  ///
+  void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+                const MCSymbol *Label);
+
+  /// addDelta - Add a label delta attribute data and value.
+  ///
+  void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+                const MCSymbol *Hi, const MCSymbol *Lo);
+
+  /// addDIEEntry - Add a DIE attribute data and value.
+  ///
+  void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry);
+  
+  /// addBlock - Add block data.
+  ///
+  void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
+
+  /// addSourceLine - Add location information to specified debug information
+  /// entry.
+  void addSourceLine(DIE *Die, DIVariable V);
+  void addSourceLine(DIE *Die, DIGlobalVariable G);
+  void addSourceLine(DIE *Die, DISubprogram SP);
+  void addSourceLine(DIE *Die, DIType Ty);
+  void addSourceLine(DIE *Die, DINameSpace NS);
+
+  /// addAddress - Add an address attribute to a die based on the location
+  /// provided.
+  void addAddress(DIE *Die, unsigned Attribute,
+                  const MachineLocation &Location);
+
+  /// addConstantValue - Add constant value entry in variable DIE.
+  bool addConstantValue(DIE *Die, const MachineOperand &MO);
+  bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned);
+
+  /// addConstantFPValue - Add constant value entry in variable DIE.
+  bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
+
+  /// addTemplateParams - Add template parameters in buffer.
+  void addTemplateParams(DIE &Buffer, DIArray TParams);
+
+  /// addRegisterOp - Add register operand.
+  void addRegisterOp(DIE *TheDie, unsigned Reg);
+
+  /// addRegisterOffset - Add register offset.
+  void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset);
+
+  /// addComplexAddress - Start with the address based on the location provided,
+  /// and generate the DWARF information necessary to find the actual variable
+  /// (navigating the extra location information encoded in the type) based on
+  /// the starting location.  Add the DWARF information to the die.
+  ///
+  void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+                         const MachineLocation &Location);
+
+  // FIXME: Should be reformulated in terms of addComplexAddress.
+  /// addBlockByrefAddress - Start with the address based on the location
+  /// provided, and generate the DWARF information necessary to find the
+  /// actual Block variable (navigating the Block struct) based on the
+  /// starting location.  Add the DWARF information to the die.  Obsolete,
+  /// please use addComplexAddress instead.
+  ///
+  void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+                            const MachineLocation &Location);
+
+  /// addVariableAddress - Add DW_AT_location attribute for a 
+  /// DbgVariable based on provided MachineLocation.
+  void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location);
+
+  /// addToContextOwner - Add Die into the list of its context owner's children.
+  void addToContextOwner(DIE *Die, DIDescriptor Context);
+
+  /// addType - Add a new type attribute to the specified entity.
+  void addType(DIE *Entity, DIType Ty);
+
+  /// getOrCreateNameSpace - Create a DIE for DINameSpace.
+  DIE *getOrCreateNameSpace(DINameSpace NS);
+
+  /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+  /// given DIType.
+  DIE *getOrCreateTypeDIE(DIType Ty);
+
+  /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+  /// for the given DITemplateTypeParameter.
+  DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
+
+  /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
+  /// for the given DITemplateValueParameter.
+  DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
+
+  /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+  /// information entry.
+  DIEEntry *createDIEEntry(DIE *Entry);
+
+  void addPubTypes(DISubprogram SP);
+
+  /// constructTypeDIE - Construct basic type die from DIBasicType.
+  void constructTypeDIE(DIE &Buffer,
+                        DIBasicType BTy);
+
+  /// constructTypeDIE - Construct derived type die from DIDerivedType.
+  void constructTypeDIE(DIE &Buffer,
+                        DIDerivedType DTy);
+
+  /// constructTypeDIE - Construct type DIE from DICompositeType.
+  void constructTypeDIE(DIE &Buffer,
+                        DICompositeType CTy);
+
+  /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+  void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
+
+  /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+  void constructArrayTypeDIE(DIE &Buffer, 
+                             DICompositeType *CTy);
+
+  /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+  DIE *constructEnumTypeDIE(DIEnumerator ETy);
+
+  /// createMemberDIE - Create new member DIE.
+  DIE *createMemberDIE(DIDerivedType DT);
+
+private:
+
+  // DIEValueAllocator - All DIEValues are allocated through this allocator.
+  BumpPtrAllocator DIEValueAllocator;
+  DIEInteger *DIEIntegerOne;
+};
+
+} // end llvm namespace
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 780fa40..26da800 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -14,6 +14,7 @@
 #define DEBUG_TYPE "dwarfdebug"
 #include "DwarfDebug.h"
 #include "DIE.h"
+#include "DwarfCompileUnit.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/Instructions.h"
@@ -52,7 +53,7 @@ static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
      cl::desc("Disable debug info printing"));
 
 static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
-     cl::desc("Make an absense of debug location information explicit."),
+     cl::desc("Make an absence of debug location information explicit."),
      cl::init(false));
 
 #ifndef NDEBUG
@@ -72,189 +73,56 @@ static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
 
 namespace llvm {
 
-//===----------------------------------------------------------------------===//
-/// CompileUnit - This dwarf writer support class manages information associate
-/// with a source file.
-class CompileUnit {
-  /// ID - File identifier for source.
-  ///
-  unsigned ID;
-
-  /// Die - Compile unit debug information entry.
-  ///
-  const OwningPtr<DIE> CUDie;
-
-  /// IndexTyDie - An anonymous type for index type.  Owned by CUDie.
-  DIE *IndexTyDie;
-
-  /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton
-  /// variables to debug information entries.
-  DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
-
-  /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton
-  /// descriptors to debug information entries using a DIEEntry proxy.
-  DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
-
-  /// Globals - A map of globally visible named entities for this unit.
-  ///
-  StringMap<DIE*> Globals;
-
-  /// GlobalTypes - A map of globally visible types for this unit.
-  ///
-  StringMap<DIE*> GlobalTypes;
-
-public:
-  CompileUnit(unsigned I, DIE *D)
-    : ID(I), CUDie(D), IndexTyDie(0) {}
-
-  // Accessors.
-  unsigned getID()                  const { return ID; }
-  DIE* getCUDie()                   const { return CUDie.get(); }
-  const StringMap<DIE*> &getGlobals()     const { return Globals; }
-  const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
-
-  /// hasContent - Return true if this compile unit has something to write out.
-  ///
-  bool hasContent() const { return !CUDie->getChildren().empty(); }
-
-  /// addGlobal - Add a new global entity to the compile unit.
-  ///
-  void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; }
-
-  /// addGlobalType - Add a new global type to the compile unit.
-  ///
-  void addGlobalType(StringRef Name, DIE *Die) {
-    GlobalTypes[Name] = Die;
-  }
-
-  /// getDIE - Returns the debug information entry map slot for the
-  /// specified debug variable.
-  DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
-
-  /// insertDIE - Insert DIE into the map.
-  void insertDIE(const MDNode *N, DIE *D) {
-    MDNodeToDieMap.insert(std::make_pair(N, D));
-  }
-
-  /// getDIEEntry - Returns the debug information entry for the speciefied
-  /// debug variable.
-  DIEEntry *getDIEEntry(const MDNode *N) {
-    DenseMap<const MDNode *, DIEEntry *>::iterator I =
-      MDNodeToDIEEntryMap.find(N);
-    if (I == MDNodeToDIEEntryMap.end())
-      return NULL;
-    return I->second;
-  }
-
-  /// insertDIEEntry - Insert debug information entry into the map.
-  void insertDIEEntry(const MDNode *N, DIEEntry *E) {
-    MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
-  }
-
-  /// addDie - Adds or interns the DIE to the compile unit.
-  ///
-  void addDie(DIE *Buffer) {
-    this->CUDie->addChild(Buffer);
-  }
-
-  // getIndexTyDie - Get an anonymous type for index type.
-  DIE *getIndexTyDie() {
-    return IndexTyDie;
-  }
-
-  // setIndexTyDie - Set D as anonymous type for index which can be reused
-  // later.
-  void setIndexTyDie(DIE *D) {
-    IndexTyDie = D;
-  }
-
-};
-
-//===----------------------------------------------------------------------===//
-/// DbgVariable - This class is used to track local variable information.
-///
-class DbgVariable {
-  DIVariable Var;                    // Variable Descriptor.
-  DIE *TheDIE;                       // Variable DIE.
-  unsigned DotDebugLocOffset;        // Offset in DotDebugLocEntries.
-public:
-  // AbsVar may be NULL.
-  DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {}
-
-  // Accessors.
-  DIVariable getVariable()           const { return Var; }
-  void setDIE(DIE *D)                      { TheDIE = D; }
-  DIE *getDIE()                      const { return TheDIE; }
-  void setDotDebugLocOffset(unsigned O)    { DotDebugLocOffset = O; }
-  unsigned getDotDebugLocOffset()    const { return DotDebugLocOffset; }
-  StringRef getName()                const { return Var.getName(); }
-  unsigned getTag()                  const { return Var.getTag(); }
-  bool variableHasComplexAddress()   const {
-    assert(Var.Verify() && "Invalid complex DbgVariable!");
-    return Var.hasComplexAddress();
-  }
-  bool isBlockByrefVariable()        const {
-    assert(Var.Verify() && "Invalid complex DbgVariable!");
-    return Var.isBlockByrefVariable();
-  }
-  unsigned getNumAddrElements()      const { 
-    assert(Var.Verify() && "Invalid complex DbgVariable!");
-    return Var.getNumAddrElements();
-  }
-  uint64_t getAddrElement(unsigned i) const {
-    return Var.getAddrElement(i);
-  }
-  DIType getType()               const {
-    DIType Ty = Var.getType();
-    // FIXME: isBlockByrefVariable should be reformulated in terms of complex
-    // addresses instead.
-    if (Var.isBlockByrefVariable()) {
-      /* Byref variables, in Blocks, are declared by the programmer as
-         "SomeType VarName;", but the compiler creates a
-         __Block_byref_x_VarName struct, and gives the variable VarName
-         either the struct, or a pointer to the struct, as its type.  This
-         is necessary for various behind-the-scenes things the compiler
-         needs to do with by-reference variables in blocks.
-         
-         However, as far as the original *programmer* is concerned, the
-         variable should still have type 'SomeType', as originally declared.
-         
-         The following function dives into the __Block_byref_x_VarName
-         struct to find the original type of the variable.  This will be
-         passed back to the code generating the type for the Debug
-         Information Entry for the variable 'VarName'.  'VarName' will then
-         have the original type 'SomeType' in its debug information.
-         
-         The original type 'SomeType' will be the type of the field named
-         'VarName' inside the __Block_byref_x_VarName struct.
-         
-         NOTE: In order for this to not completely fail on the debugger
-         side, the Debug Information Entry for the variable VarName needs to
-         have a DW_AT_location that tells the debugger how to unwind through
-         the pointers and __Block_byref_x_VarName struct to find the actual
-         value of the variable.  The function addBlockByrefType does this.  */
-      DIType subType = Ty;
-      unsigned tag = Ty.getTag();
-      
-      if (tag == dwarf::DW_TAG_pointer_type) {
-        DIDerivedType DTy = DIDerivedType(Ty);
-        subType = DTy.getTypeDerivedFrom();
-      }
-      
-      DICompositeType blockStruct = DICompositeType(subType);
-      DIArray Elements = blockStruct.getTypeArray();
-      
-      for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-        DIDescriptor Element = Elements.getElement(i);
-        DIDerivedType DT = DIDerivedType(Element);
-        if (getName() == DT.getName())
-          return (DT.getTypeDerivedFrom());
-      }
-      return Ty;
+DIType DbgVariable::getType()               const {
+  DIType Ty = Var.getType();
+  // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+  // addresses instead.
+  if (Var.isBlockByrefVariable()) {
+    /* Byref variables, in Blocks, are declared by the programmer as
+       "SomeType VarName;", but the compiler creates a
+       __Block_byref_x_VarName struct, and gives the variable VarName
+       either the struct, or a pointer to the struct, as its type.  This
+       is necessary for various behind-the-scenes things the compiler
+       needs to do with by-reference variables in blocks.
+       
+       However, as far as the original *programmer* is concerned, the
+       variable should still have type 'SomeType', as originally declared.
+       
+       The following function dives into the __Block_byref_x_VarName
+       struct to find the original type of the variable.  This will be
+       passed back to the code generating the type for the Debug
+       Information Entry for the variable 'VarName'.  'VarName' will then
+       have the original type 'SomeType' in its debug information.
+       
+       The original type 'SomeType' will be the type of the field named
+       'VarName' inside the __Block_byref_x_VarName struct.
+       
+       NOTE: In order for this to not completely fail on the debugger
+       side, the Debug Information Entry for the variable VarName needs to
+       have a DW_AT_location that tells the debugger how to unwind through
+       the pointers and __Block_byref_x_VarName struct to find the actual
+       value of the variable.  The function addBlockByrefType does this.  */
+    DIType subType = Ty;
+    unsigned tag = Ty.getTag();
+    
+    if (tag == dwarf::DW_TAG_pointer_type) {
+      DIDerivedType DTy = DIDerivedType(Ty);
+      subType = DTy.getTypeDerivedFrom();
+    }
+    
+    DICompositeType blockStruct = DICompositeType(subType);
+    DIArray Elements = blockStruct.getTypeArray();
+    
+    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+      DIDescriptor Element = Elements.getElement(i);
+      DIDerivedType DT = DIDerivedType(Element);
+      if (getName() == DT.getName())
+        return (DT.getTypeDerivedFrom());
     }
     return Ty;
   }
-};
+  return Ty;
+}
 
 //===----------------------------------------------------------------------===//
 /// DbgRange - This is used to track range of instructions with identical
@@ -396,15 +264,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   DwarfStrSectionSym = TextSectionSym = 0;
   DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
   FunctionBeginSym = FunctionEndSym = 0;
-  DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
   {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
     beginModule(M);
   }
 }
 DwarfDebug::~DwarfDebug() {
-  for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
-    DIEBlocks[j]->~DIEBlock();
 }
 
 MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
@@ -439,852 +304,6 @@ void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
   }
 }
 
-/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
-/// information entry.
-DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) {
-  DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
-  return Value;
-}
-
-/// addUInt - Add an unsigned integer attribute data and value.
-///
-void DwarfDebug::addUInt(DIE *Die, unsigned Attribute,
-                         unsigned Form, uint64_t Integer) {
-  if (!Form) Form = DIEInteger::BestForm(false, Integer);
-  DIEValue *Value = Integer == 1 ?
-    DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer);
-  Die->addValue(Attribute, Form, Value);
-}
-
-/// addSInt - Add an signed integer attribute data and value.
-///
-void DwarfDebug::addSInt(DIE *Die, unsigned Attribute,
-                         unsigned Form, int64_t Integer) {
-  if (!Form) Form = DIEInteger::BestForm(true, Integer);
-  DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
-  Die->addValue(Attribute, Form, Value);
-}
-
-/// addString - Add a string attribute data and value. DIEString only
-/// keeps string reference.
-void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form,
-                           StringRef String) {
-  DIEValue *Value = new (DIEValueAllocator) DIEString(String);
-  Die->addValue(Attribute, Form, Value);
-}
-
-/// addLabel - Add a Dwarf label attribute data and value.
-///
-void DwarfDebug::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
-                          const MCSymbol *Label) {
-  DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
-  Die->addValue(Attribute, Form, Value);
-}
-
-/// addDelta - Add a label delta attribute data and value.
-///
-void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
-                          const MCSymbol *Hi, const MCSymbol *Lo) {
-  DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
-  Die->addValue(Attribute, Form, Value);
-}
-
-/// addDIEEntry - Add a DIE attribute data and value.
-///
-void DwarfDebug::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
-                             DIE *Entry) {
-  Die->addValue(Attribute, Form, createDIEEntry(Entry));
-}
-
-
-/// addBlock - Add block data.
-///
-void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
-                          DIEBlock *Block) {
-  Block->ComputeSize(Asm);
-  DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
-  Die->addValue(Attribute, Block->BestForm(), Block);
-}
-
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) {
-  // Verify variable.
-  if (!V.Verify())
-    return;
-
-  unsigned Line = V.getLineNumber();
-  if (Line == 0)
-    return;
-  unsigned FileID = GetOrCreateSourceID(V.getContext().getFilename());
-  assert(FileID && "Invalid file id");
-  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
-}
-
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) {
-  // Verify global variable.
-  if (!G.Verify())
-    return;
-
-  unsigned Line = G.getLineNumber();
-  if (Line == 0)
-    return;
-  unsigned FileID = GetOrCreateSourceID(G.getContext().getFilename());
-  assert(FileID && "Invalid file id");
-  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
-}
-
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) {
-  // Verify subprogram.
-  if (!SP.Verify())
-    return;
-  // If the line number is 0, don't add it.
-  if (SP.getLineNumber() == 0)
-    return;
-
-  unsigned Line = SP.getLineNumber();
-  if (!SP.getContext().Verify())
-    return;
-  unsigned FileID = GetOrCreateSourceID(SP.getFilename());
-  assert(FileID && "Invalid file id");
-  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
-}
-
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) {
-  // Verify type.
-  if (!Ty.Verify())
-    return;
-
-  unsigned Line = Ty.getLineNumber();
-  if (Line == 0 || !Ty.getContext().Verify())
-    return;
-  unsigned FileID = GetOrCreateSourceID(Ty.getFilename());
-  assert(FileID && "Invalid file id");
-  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
-}
-
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) {
-  // Verify namespace.
-  if (!NS.Verify())
-    return;
-
-  unsigned Line = NS.getLineNumber();
-  if (Line == 0)
-    return;
-  StringRef FN = NS.getFilename();
-
-  unsigned FileID = GetOrCreateSourceID(FN);
-  assert(FileID && "Invalid file id");
-  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
-}
-
-/// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based
-/// on provided frame index.
-void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) {
-  MachineLocation Location;
-  unsigned FrameReg;
-  const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
-  int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
-  Location.set(FrameReg, Offset);
-
-  if (DV->variableHasComplexAddress())
-    addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
-  else if (DV->isBlockByrefVariable())
-    addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
-  else
-    addAddress(Die, dwarf::DW_AT_location, Location);
-}
-
-/// addComplexAddress - Start with the address based on the location provided,
-/// and generate the DWARF information necessary to find the actual variable
-/// given the extra address information encoded in the DIVariable, starting from
-/// the starting location.  Add the DWARF information to the die.
-///
-void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
-                                   unsigned Attribute,
-                                   const MachineLocation &Location) {
-  DIType Ty = DV->getType();
-
-  // Decode the original location, and use that as the start of the byref
-  // variable's location.
-  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-
-  if (Location.isReg()) {
-    if (Reg < 32) {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
-    } else {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
-      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
-    }
-  } else {
-    if (Reg < 32)
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
-    else {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
-      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
-    }
-
-    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
-  }
-
-  for (unsigned i = 0, N = DV->getNumAddrElements(); i < N; ++i) {
-    uint64_t Element = DV->getAddrElement(i);
-
-    if (Element == DIBuilder::OpPlus) {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-      addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
-    } else if (Element == DIBuilder::OpDeref) {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
-    } else llvm_unreachable("unknown DIBuilder Opcode");
-  }
-
-  // Now attach the location information to the DIE.
-  addBlock(Die, Attribute, 0, Block);
-}
-
-/* Byref variables, in Blocks, are declared by the programmer as "SomeType
-   VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
-   gives the variable VarName either the struct, or a pointer to the struct, as
-   its type.  This is necessary for various behind-the-scenes things the
-   compiler needs to do with by-reference variables in Blocks.
-
-   However, as far as the original *programmer* is concerned, the variable
-   should still have type 'SomeType', as originally declared.
-
-   The function getBlockByrefType dives into the __Block_byref_x_VarName
-   struct to find the original type of the variable, which is then assigned to
-   the variable's Debug Information Entry as its real type.  So far, so good.
-   However now the debugger will expect the variable VarName to have the type
-   SomeType.  So we need the location attribute for the variable to be an
-   expression that explains to the debugger how to navigate through the
-   pointers and struct to find the actual variable of type SomeType.
-
-   The following function does just that.  We start by getting
-   the "normal" location for the variable. This will be the location
-   of either the struct __Block_byref_x_VarName or the pointer to the
-   struct __Block_byref_x_VarName.
-
-   The struct will look something like:
-
-   struct __Block_byref_x_VarName {
-     ... <various fields>
-     struct __Block_byref_x_VarName *forwarding;
-     ... <various other fields>
-     SomeType VarName;
-     ... <maybe more fields>
-   };
-
-   If we are given the struct directly (as our starting point) we
-   need to tell the debugger to:
-
-   1).  Add the offset of the forwarding field.
-
-   2).  Follow that pointer to get the real __Block_byref_x_VarName
-   struct to use (the real one may have been copied onto the heap).
-
-   3).  Add the offset for the field VarName, to find the actual variable.
-
-   If we started with a pointer to the struct, then we need to
-   dereference that pointer first, before the other steps.
-   Translating this into DWARF ops, we will need to append the following
-   to the current location description for the variable:
-
-   DW_OP_deref                    -- optional, if we start with a pointer
-   DW_OP_plus_uconst <forward_fld_offset>
-   DW_OP_deref
-   DW_OP_plus_uconst <varName_fld_offset>
-
-   That is what this function does.  */
-
-/// addBlockByrefAddress - Start with the address based on the location
-/// provided, and generate the DWARF information necessary to find the
-/// actual Block variable (navigating the Block struct) based on the
-/// starting location.  Add the DWARF information to the die.  For
-/// more information, read large comment just above here.
-///
-void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
-                                      unsigned Attribute,
-                                      const MachineLocation &Location) {
-  DIType Ty = DV->getType();
-  DIType TmpTy = Ty;
-  unsigned Tag = Ty.getTag();
-  bool isPointer = false;
-
-  StringRef varName = DV->getName();
-
-  if (Tag == dwarf::DW_TAG_pointer_type) {
-    DIDerivedType DTy = DIDerivedType(Ty);
-    TmpTy = DTy.getTypeDerivedFrom();
-    isPointer = true;
-  }
-
-  DICompositeType blockStruct = DICompositeType(TmpTy);
-
-  // Find the __forwarding field and the variable field in the __Block_byref
-  // struct.
-  DIArray Fields = blockStruct.getTypeArray();
-  DIDescriptor varField = DIDescriptor();
-  DIDescriptor forwardingField = DIDescriptor();
-
-  for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
-    DIDescriptor Element = Fields.getElement(i);
-    DIDerivedType DT = DIDerivedType(Element);
-    StringRef fieldName = DT.getName();
-    if (fieldName == "__forwarding")
-      forwardingField = Element;
-    else if (fieldName == varName)
-      varField = Element;
-  }
-
-  // Get the offsets for the forwarding field and the variable field.
-  unsigned forwardingFieldOffset =
-    DIDerivedType(forwardingField).getOffsetInBits() >> 3;
-  unsigned varFieldOffset =
-    DIDerivedType(varField).getOffsetInBits() >> 3;
-
-  // Decode the original location, and use that as the start of the byref
-  // variable's location.
-  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-
-  if (Location.isReg()) {
-    if (Reg < 32)
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
-    else {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
-      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
-    }
-  } else {
-    if (Reg < 32)
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
-    else {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
-      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
-    }
-
-    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
-  }
-
-  // If we started with a pointer to the __Block_byref... struct, then
-  // the first thing we need to do is dereference the pointer (DW_OP_deref).
-  if (isPointer)
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
-
-  // Next add the offset for the '__forwarding' field:
-  // DW_OP_plus_uconst ForwardingFieldOffset.  Note there's no point in
-  // adding the offset if it's 0.
-  if (forwardingFieldOffset > 0) {
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-    addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
-  }
-
-  // Now dereference the __forwarding field to get to the real __Block_byref
-  // struct:  DW_OP_deref.
-  addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
-
-  // Now that we've got the real __Block_byref... struct, add the offset
-  // for the variable's field to get to the location of the actual variable:
-  // DW_OP_plus_uconst varFieldOffset.  Again, don't add if it's 0.
-  if (varFieldOffset > 0) {
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-    addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
-  }
-
-  // Now attach the location information to the DIE.
-  addBlock(Die, Attribute, 0, Block);
-}
-
-/// addAddress - Add an address attribute to a die based on the location
-/// provided.
-void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
-                            const MachineLocation &Location) {
-  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-
-  if (RI->getFrameRegister(*Asm->MF) == Location.getReg()
-      && Location.getOffset()) {
-    // If variable offset is based in frame register then use fbreg.
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
-    addSInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
-    addBlock(Die, Attribute, 0, Block);
-    return;
-  }
-
-  if (Location.isReg()) {
-    if (Reg < 32) {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
-    } else {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
-      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
-    }
-  } else {
-    if (Reg < 32) {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
-    } else {
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
-      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
-    }
-
-    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
-  }
-
-  addBlock(Die, Attribute, 0, Block);
-}
-
-/// addRegisterAddress - Add register location entry in variable DIE.
-bool DwarfDebug::addRegisterAddress(DIE *Die, const MachineOperand &MO) {
-  assert (MO.isReg() && "Invalid machine operand!");
-  if (!MO.getReg())
-    return false;
-  MachineLocation Location;
-  Location.set(MO.getReg());
-  addAddress(Die, dwarf::DW_AT_location, Location);
-  return true;
-}
-
-/// addConstantValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantValue(DIE *Die, const MachineOperand &MO) {
-  assert (MO.isImm() && "Invalid machine operand!");
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-  unsigned Imm = MO.getImm();
-  addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
-  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-  return true;
-}
-
-/// addConstantFPValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
-  assert (MO.isFPImm() && "Invalid machine operand!");
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-  APFloat FPImm = MO.getFPImm()->getValueAPF();
-
-  // Get the raw data form of the floating point.
-  const APInt FltVal = FPImm.bitcastToAPInt();
-  const char *FltPtr = (const char*)FltVal.getRawData();
-
-  int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
-  bool LittleEndian = Asm->getTargetData().isLittleEndian();
-  int Incr = (LittleEndian ? 1 : -1);
-  int Start = (LittleEndian ? 0 : NumBytes - 1);
-  int Stop = (LittleEndian ? NumBytes : -1);
-
-  // Output the constant to DWARF one byte at a time.
-  for (; Start != Stop; Start += Incr)
-    addUInt(Block, 0, dwarf::DW_FORM_data1,
-            (unsigned char)0xFF & FltPtr[Start]);
-
-  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-  return true;
-}
-
-/// addConstantValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantValue(DIE *Die, ConstantInt *CI,
-                                  bool Unsigned) {
-  if (CI->getBitWidth() <= 64) {
-    if (Unsigned)
-      addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
-              CI->getZExtValue());
-    else
-      addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
-              CI->getSExtValue());
-    return true;
-  }
-
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-
-  // Get the raw data form of the large APInt.
-  const APInt Val = CI->getValue();
-  const char *Ptr = (const char*)Val.getRawData();
-
-  int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
-  bool LittleEndian = Asm->getTargetData().isLittleEndian();
-  int Incr = (LittleEndian ? 1 : -1);
-  int Start = (LittleEndian ? 0 : NumBytes - 1);
-  int Stop = (LittleEndian ? NumBytes : -1);
-
-  // Output the constant to DWARF one byte at a time.
-  for (; Start != Stop; Start += Incr)
-    addUInt(Block, 0, dwarf::DW_FORM_data1,
-            (unsigned char)0xFF & Ptr[Start]);
-
-  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-  return true;
-}
-
-/// addToContextOwner - Add Die into the list of its context owner's children.
-void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
-  if (Context.isType()) {
-    DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context));
-    ContextDIE->addChild(Die);
-  } else if (Context.isNameSpace()) {
-    DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
-    ContextDIE->addChild(Die);
-  } else if (Context.isSubprogram()) {
-    DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context));
-    ContextDIE->addChild(Die);
-  } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context))
-    ContextDIE->addChild(Die);
-  else
-    getCompileUnit(Context)->addDie(Die);
-}
-
-/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
-/// given DIType.
-DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) {
-  CompileUnit *TypeCU = getCompileUnit(Ty);
-  DIE *TyDIE = TypeCU->getDIE(Ty);
-  if (TyDIE)
-    return TyDIE;
-
-  // Create new type.
-  TyDIE = new DIE(dwarf::DW_TAG_base_type);
-  TypeCU->insertDIE(Ty, TyDIE);
-  if (Ty.isBasicType())
-    constructTypeDIE(*TyDIE, DIBasicType(Ty));
-  else if (Ty.isCompositeType())
-    constructTypeDIE(*TyDIE, DICompositeType(Ty));
-  else {
-    assert(Ty.isDerivedType() && "Unknown kind of DIType");
-    constructTypeDIE(*TyDIE, DIDerivedType(Ty));
-  }
-
-  addToContextOwner(TyDIE, Ty.getContext());
-  return TyDIE;
-}
-
-/// addType - Add a new type attribute to the specified entity.
-void DwarfDebug::addType(DIE *Entity, DIType Ty) {
-  if (!Ty.Verify())
-    return;
-
-  // Check for pre-existence.
-  CompileUnit *TypeCU = getCompileUnit(Ty);
-  DIEEntry *Entry = TypeCU->getDIEEntry(Ty);
-  // If it exists then use the existing value.
-  if (Entry) {
-    Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
-    return;
-  }
-
-  // Construct type.
-  DIE *Buffer = getOrCreateTypeDIE(Ty);
-
-  // Set up proxy.
-  Entry = createDIEEntry(Buffer);
-  TypeCU->insertDIEEntry(Ty, Entry);
-
-  Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
-}
-
-/// constructTypeDIE - Construct basic type die from DIBasicType.
-void DwarfDebug::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
-  // Get core information.
-  StringRef Name = BTy.getName();
-  Buffer.setTag(dwarf::DW_TAG_base_type);
-  addUInt(&Buffer, dwarf::DW_AT_encoding,  dwarf::DW_FORM_data1,
-          BTy.getEncoding());
-
-  // Add name if not anonymous or intermediate type.
-  if (!Name.empty())
-    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-  uint64_t Size = BTy.getSizeInBits() >> 3;
-  addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
-}
-
-/// constructTypeDIE - Construct derived type die from DIDerivedType.
-void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
-  // Get core information.
-  StringRef Name = DTy.getName();
-  uint64_t Size = DTy.getSizeInBits() >> 3;
-  unsigned Tag = DTy.getTag();
-
-  // FIXME - Workaround for templates.
-  if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type;
-
-  Buffer.setTag(Tag);
-
-  // Map to main type, void will not have a type.
-  DIType FromTy = DTy.getTypeDerivedFrom();
-  addType(&Buffer, FromTy);
-
-  // Add name if not anonymous or intermediate type.
-  if (!Name.empty())
-    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-
-  // Add size if non-zero (derived types might be zero-sized.)
-  if (Size)
-    addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
-
-  // Add source line info if available and TyDesc is not a forward declaration.
-  if (!DTy.isForwardDecl())
-    addSourceLine(&Buffer, DTy);
-}
-
-/// constructTypeDIE - Construct type DIE from DICompositeType.
-void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
-  // Get core information.
-  StringRef Name = CTy.getName();
-
-  uint64_t Size = CTy.getSizeInBits() >> 3;
-  unsigned Tag = CTy.getTag();
-  Buffer.setTag(Tag);
-
-  switch (Tag) {
-  case dwarf::DW_TAG_vector_type:
-  case dwarf::DW_TAG_array_type:
-    constructArrayTypeDIE(Buffer, &CTy);
-    break;
-  case dwarf::DW_TAG_enumeration_type: {
-    DIArray Elements = CTy.getTypeArray();
-
-    // Add enumerators to enumeration type.
-    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-      DIE *ElemDie = NULL;
-      DIDescriptor Enum(Elements.getElement(i));
-      if (Enum.isEnumerator()) {
-        ElemDie = constructEnumTypeDIE(DIEnumerator(Enum));
-        Buffer.addChild(ElemDie);
-      }
-    }
-  }
-    break;
-  case dwarf::DW_TAG_subroutine_type: {
-    // Add return type.
-    DIArray Elements = CTy.getTypeArray();
-    DIDescriptor RTy = Elements.getElement(0);
-    addType(&Buffer, DIType(RTy));
-
-    bool isPrototyped = true;
-    // Add arguments.
-    for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
-      DIDescriptor Ty = Elements.getElement(i);
-      if (Ty.isUnspecifiedParameter()) {
-        DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters);
-        Buffer.addChild(Arg);
-        isPrototyped = false;
-      } else {
-        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-        addType(Arg, DIType(Ty));
-        Buffer.addChild(Arg);
-      }
-    }
-    // Add prototype flag.
-    if (isPrototyped)
-      addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
-  }
-    break;
-  case dwarf::DW_TAG_structure_type:
-  case dwarf::DW_TAG_union_type:
-  case dwarf::DW_TAG_class_type: {
-    // Add elements to structure type.
-    DIArray Elements = CTy.getTypeArray();
-
-    // A forward struct declared type may not have elements available.
-    unsigned N = Elements.getNumElements();
-    if (N == 0)
-      break;
-
-    // Add elements to structure type.
-    for (unsigned i = 0; i < N; ++i) {
-      DIDescriptor Element = Elements.getElement(i);
-      DIE *ElemDie = NULL;
-      if (Element.isSubprogram()) {
-        DISubprogram SP(Element);
-        ElemDie = createSubprogramDIE(DISubprogram(Element));
-        if (SP.isProtected())
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
-                  dwarf::DW_ACCESS_protected);
-        else if (SP.isPrivate())
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
-                  dwarf::DW_ACCESS_private);
-        else 
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
-            dwarf::DW_ACCESS_public);
-        if (SP.isExplicit())
-          addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
-      }
-      else if (Element.isVariable()) {
-        DIVariable DV(Element);
-        ElemDie = new DIE(dwarf::DW_TAG_variable);
-        addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
-                  DV.getName());
-        addType(ElemDie, DV.getType());
-        addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-        addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
-        addSourceLine(ElemDie, DV);
-      } else if (Element.isDerivedType())
-        ElemDie = createMemberDIE(DIDerivedType(Element));
-      else
-        continue;
-      Buffer.addChild(ElemDie);
-    }
-
-    if (CTy.isAppleBlockExtension())
-      addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
-
-    unsigned RLang = CTy.getRunTimeLang();
-    if (RLang)
-      addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
-              dwarf::DW_FORM_data1, RLang);
-
-    DICompositeType ContainingType = CTy.getContainingType();
-    if (DIDescriptor(ContainingType).isCompositeType())
-      addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
-                  getOrCreateTypeDIE(DIType(ContainingType)));
-    else {
-      DIDescriptor Context = CTy.getContext();
-      addToContextOwner(&Buffer, Context);
-    }
-
-    if (Tag == dwarf::DW_TAG_class_type) {
-      DIArray TParams = CTy.getTemplateParams();
-      unsigned N = TParams.getNumElements();
-      // Add template parameters.
-      for (unsigned i = 0; i < N; ++i) {
-        DIDescriptor Element = TParams.getElement(i);
-        if (Element.isTemplateTypeParameter())
-          Buffer.addChild(getOrCreateTemplateTypeParameterDIE(
-                            DITemplateTypeParameter(Element)));
-        else if (Element.isTemplateValueParameter())
-          Buffer.addChild(getOrCreateTemplateValueParameterDIE(
-                            DITemplateValueParameter(Element)));
-      }
-    }
-    break;
-  }
-  default:
-    break;
-  }
-
-  // Add name if not anonymous or intermediate type.
-  if (!Name.empty())
-    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-
-  if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
-      || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
-    {
-    // Add size if non-zero (derived types might be zero-sized.)
-    if (Size)
-      addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
-    else {
-      // Add zero size if it is not a forward declaration.
-      if (CTy.isForwardDecl())
-        addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-      else
-        addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
-    }
-
-    // Add source line info if available.
-    if (!CTy.isForwardDecl())
-      addSourceLine(&Buffer, CTy);
-  }
-}
-
-/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
-/// for the given DITemplateTypeParameter.
-DIE *
-DwarfDebug::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
-  CompileUnit *TypeCU = getCompileUnit(TP);
-  DIE *ParamDIE = TypeCU->getDIE(TP);
-  if (ParamDIE)
-    return ParamDIE;
-
-  ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
-  addType(ParamDIE, TP.getType());
-  addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
-  return ParamDIE;
-}
-
-/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
-/// for the given DITemplateValueParameter.
-DIE *
-DwarfDebug::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) {
-  CompileUnit *TVCU = getCompileUnit(TPV);
-  DIE *ParamDIE = TVCU->getDIE(TPV);
-  if (ParamDIE)
-    return ParamDIE;
-
-  ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
-  addType(ParamDIE, TPV.getType());
-  addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
-  addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, 
-          TPV.getValue());
-  return ParamDIE;
-}
-
-/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
-void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
-  int64_t L = SR.getLo();
-  int64_t H = SR.getHi();
-  DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
-
-  addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
-  if (L)
-    addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
-  addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
-
-  Buffer.addChild(DW_Subrange);
-}
-
-/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-void DwarfDebug::constructArrayTypeDIE(DIE &Buffer,
-                                       DICompositeType *CTy) {
-  Buffer.setTag(dwarf::DW_TAG_array_type);
-  if (CTy->getTag() == dwarf::DW_TAG_vector_type)
-    addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
-
-  // Emit derived type.
-  addType(&Buffer, CTy->getTypeDerivedFrom());
-  DIArray Elements = CTy->getTypeArray();
-
-  // Get an anonymous type for index type.
-  CompileUnit *TheCU = getCompileUnit(*CTy);
-  DIE *IdxTy = TheCU->getIndexTyDie();
-  if (!IdxTy) {
-    // Construct an anonymous type for index type.
-    IdxTy = new DIE(dwarf::DW_TAG_base_type);
-    addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
-    addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
-            dwarf::DW_ATE_signed);
-    TheCU->addDie(IdxTy);
-    TheCU->setIndexTyDie(IdxTy);
-  }
-
-  // Add subranges to array type.
-  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-    DIDescriptor Element = Elements.getElement(i);
-    if (Element.getTag() == dwarf::DW_TAG_subrange_type)
-      constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
-  }
-}
-
-/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator ETy) {
-  DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
-  StringRef Name = ETy.getName();
-  addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-  int64_t Value = ETy.getEnumValue();
-  addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
-  return Enumerator;
-}
-
 /// getRealLinkageName - If special LLVM prefix that is used to inform the asm
 /// printer to not emit usual symbol prefix before the symbol name is used then
 /// return linkage name after skipping this special LLVM prefix.
@@ -1295,84 +314,6 @@ static StringRef getRealLinkageName(StringRef LinkageName) {
   return LinkageName;
 }
 
-/// createMemberDIE - Create new member DIE.
-DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) {
-  DIE *MemberDie = new DIE(DT.getTag());
-  StringRef Name = DT.getName();
-  if (!Name.empty())
-    addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-
-  addType(MemberDie, DT.getTypeDerivedFrom());
-
-  addSourceLine(MemberDie, DT);
-
-  DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
-  addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-
-  uint64_t Size = DT.getSizeInBits();
-  uint64_t FieldSize = DT.getOriginalTypeSize();
-
-  if (Size != FieldSize) {
-    // Handle bitfield.
-    addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
-    addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
-
-    uint64_t Offset = DT.getOffsetInBits();
-    uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
-    uint64_t HiMark = (Offset + FieldSize) & AlignMask;
-    uint64_t FieldOffset = (HiMark - FieldSize);
-    Offset -= FieldOffset;
-
-    // Maybe we need to work from the other end.
-    if (Asm->getTargetData().isLittleEndian())
-      Offset = FieldSize - (Offset + Size);
-    addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
-
-    // Here WD_AT_data_member_location points to the anonymous
-    // field that includes this bit field.
-    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
-
-  } else
-    // This is not a bitfield.
-    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
-
-  if (DT.getTag() == dwarf::DW_TAG_inheritance
-      && DT.isVirtual()) {
-
-    // For C++, virtual base classes are not at fixed offset. Use following
-    // expression to extract appropriate offset from vtable.
-    // BaseAddr = ObAddr + *((*ObAddr) - Offset)
-
-    DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock();
-    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
-    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
-    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits());
-    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
-    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
-    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
-
-    addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
-             VBaseLocationDie);
-  } else
-    addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
-
-  if (DT.isProtected())
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
-            dwarf::DW_ACCESS_protected);
-  else if (DT.isPrivate())
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
-            dwarf::DW_ACCESS_private);
-  // Otherwise C++ member and base classes are considered public.
-  else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus)
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
-            dwarf::DW_ACCESS_public);
-  if (DT.isVirtual())
-    addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
-            dwarf::DW_VIRTUALITY_virtual);
-  return MemberDie;
-}
-
 /// createSubprogramDIE - Create new DIE using SP.
 DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
   CompileUnit *SPCU = getCompileUnit(SP);
@@ -1381,19 +322,35 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
     return SPDie;
 
   SPDie = new DIE(dwarf::DW_TAG_subprogram);
+  
+  // DW_TAG_inlined_subroutine may refer to this DIE.
+  SPCU->insertDIE(SP, SPDie);
+  
+  // Add to context owner.
+  SPCU->addToContextOwner(SPDie, SP.getContext());
+
+  // Add function template parameters.
+  SPCU->addTemplateParams(*SPDie, SP.getTemplateParams());
+
+  // If this DIE is going to refer declaration info using AT_specification
+  // then there is no need to add other attributes.
+  if (SP.getFunctionDeclaration().isSubprogram())
+    return SPDie;
+
   // Constructors and operators for anonymous aggregates do not have names.
   if (!SP.getName().empty())
-    addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
+    SPCU->addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, 
+                    SP.getName());
 
   StringRef LinkageName = SP.getLinkageName();
   if (!LinkageName.empty())
-    addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
-              getRealLinkageName(LinkageName));
+    SPCU->addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+                    getRealLinkageName(LinkageName));
 
-  addSourceLine(SPDie, SP);
+  SPCU->addSourceLine(SPDie, SP);
 
   if (SP.isPrototyped()) 
-    addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+    SPCU->addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
 
   // Add Return Type.
   DICompositeType SPTy = SP.getType();
@@ -1401,24 +358,24 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
   unsigned SPTag = SPTy.getTag();
 
   if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type)
-    addType(SPDie, SPTy);
+    SPCU->addType(SPDie, SPTy);
   else
-    addType(SPDie, DIType(Args.getElement(0)));
+    SPCU->addType(SPDie, DIType(Args.getElement(0)));
 
   unsigned VK = SP.getVirtuality();
   if (VK) {
-    addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
-    DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
-    addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
+    SPCU->addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+    DIEBlock *Block = SPCU->getDIEBlock();
+    SPCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    SPCU->addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
+    SPCU->addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
     ContainingTypeMap.insert(std::make_pair(SPDie,
                                             SP.getContainingType()));
   }
 
   if (!SP.isDefinition()) {
-    addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-
+    SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+    
     // Add arguments. Do not add arguments for subprogram definition. They will
     // be handled while processing variables.
     DICompositeType SPTy = SP.getType();
@@ -1429,32 +386,26 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
       for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
         DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
         DIType ATy = DIType(DIType(Args.getElement(i)));
-        addType(Arg, ATy);
+        SPCU->addType(Arg, ATy);
         if (ATy.isArtificial())
-          addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+          SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
         SPDie->addChild(Arg);
       }
   }
 
   if (SP.isArtificial())
-    addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+    SPCU->addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
 
   if (!SP.isLocalToUnit())
-    addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+    SPCU->addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
 
   if (SP.isOptimized())
-    addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+    SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
 
   if (unsigned isa = Asm->getISAEncoding()) {
-    addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
+    SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
   }
 
-  // DW_TAG_inlined_subroutine may refer to this DIE.
-  SPCU->insertDIE(SP, SPDie);
-
-  // Add to context owner.
-  addToContextOwner(SPDie, SP.getContext());
-
   return SPDie;
 }
 
@@ -1509,51 +460,57 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
   assert(SPDie && "Unable to find subprogram DIE!");
   DISubprogram SP(SPNode);
 
-  // There is not any need to generate specification DIE for a function
-  // defined at compile unit level. If a function is defined inside another
-  // function then gdb prefers the definition at top level and but does not
-  // expect specification DIE in parent function. So avoid creating
-  // specification DIE for a function defined inside a function.
-  if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
-      !SP.getContext().isFile() &&
-      !isSubprogramContext(SP.getContext())) {
-    addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-
-    // Add arguments.
-    DICompositeType SPTy = SP.getType();
-    DIArray Args = SPTy.getTypeArray();
-    unsigned SPTag = SPTy.getTag();
-    if (SPTag == dwarf::DW_TAG_subroutine_type)
-      for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
-        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-        DIType ATy = DIType(DIType(Args.getElement(i)));
-        addType(Arg, ATy);
-        if (ATy.isArtificial())
-          addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
-        SPDie->addChild(Arg);
-      }
-    DIE *SPDeclDie = SPDie;
-    SPDie = new DIE(dwarf::DW_TAG_subprogram);
-    addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
-                SPDeclDie);
-    SPCU->addDie(SPDie);
+  DISubprogram SPDecl = SP.getFunctionDeclaration();
+  if (SPDecl.isSubprogram())
+    // Refer function declaration directly.
+    SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+                      createSubprogramDIE(SPDecl));
+  else {
+    // There is not any need to generate specification DIE for a function
+    // defined at compile unit level. If a function is defined inside another
+    // function then gdb prefers the definition at top level and but does not
+    // expect specification DIE in parent function. So avoid creating
+    // specification DIE for a function defined inside a function.
+    if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
+        !SP.getContext().isFile() &&
+        !isSubprogramContext(SP.getContext())) {
+      SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+      
+      // Add arguments.
+      DICompositeType SPTy = SP.getType();
+      DIArray Args = SPTy.getTypeArray();
+      unsigned SPTag = SPTy.getTag();
+      if (SPTag == dwarf::DW_TAG_subroutine_type)
+        for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+          DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+          DIType ATy = DIType(DIType(Args.getElement(i)));
+          SPCU->addType(Arg, ATy);
+          if (ATy.isArtificial())
+            SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+          SPDie->addChild(Arg);
+        }
+      DIE *SPDeclDie = SPDie;
+      SPDie = new DIE(dwarf::DW_TAG_subprogram);
+      SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+                        SPDeclDie);
+      SPCU->addDie(SPDie);
+    }
   }
-
   // Pick up abstract subprogram DIE.
   if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
     SPDie = new DIE(dwarf::DW_TAG_subprogram);
-    addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
-                dwarf::DW_FORM_ref4, AbsSPDIE);
+    SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
+                      dwarf::DW_FORM_ref4, AbsSPDIE);
     SPCU->addDie(SPDie);
   }
 
-  addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-           Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()));
-  addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-           Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()));
+  SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+                 Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()));
+  SPCU->addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+                 Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()));
   const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
   MachineLocation Location(RI->getFrameRegister(*Asm->MF));
-  addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+  SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
 
   return SPDie;
 }
@@ -1570,13 +527,14 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
   if (Ranges.empty())
     return 0;
 
+  CompileUnit *TheCU = getCompileUnit(Scope->getScopeNode());
   SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
   if (Ranges.size() > 1) {
     // .debug_range section has not been laid out yet. Emit offset in
     // .debug_range as a uint, size 4, for now. emitDIE will handle
     // DW_AT_ranges appropriately.
-    addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
-            DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize());
+    TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
+                   DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize());
     for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
          RE = Ranges.end(); RI != RE; ++RI) {
       DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
@@ -1595,8 +553,8 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
   assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
   assert(End->isDefined() && "Invalid end label for an inlined scope!");
 
-  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start);
-  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End);
+  TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start);
+  TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End);
 
   return ScopeDIE;
 }
@@ -1636,11 +594,11 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
   CompileUnit *TheCU = getCompileUnit(InlinedSP);
   DIE *OriginDIE = TheCU->getDIE(InlinedSP);
   assert(OriginDIE && "Unable to find Origin DIE!");
-  addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
-              dwarf::DW_FORM_ref4, OriginDIE);
+  TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
+                     dwarf::DW_FORM_ref4, OriginDIE);
 
-  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel);
-  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel);
+  TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel);
+  TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel);
 
   InlinedSubprogramDIEs.insert(OriginDIE);
 
@@ -1656,8 +614,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
     I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
 
   DILocation DL(Scope->getInlinedAt());
-  addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
-  addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+  TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
+  TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
 
   return ScopeDIE;
 }
@@ -1686,7 +644,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
 
   // Define variable debug information entry.
   DIE *VariableDie = new DIE(Tag);
-
+  CompileUnit *VariableCU = getCompileUnit(DV->getVariable());
   DIE *AbsDIE = NULL;
   DenseMap<const DbgVariable *, const DbgVariable *>::iterator
     V2AVI = VarToAbstractVarMap.find(DV);
@@ -1694,20 +652,23 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
     AbsDIE = V2AVI->second->getDIE();
 
   if (AbsDIE)
-    addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
-                dwarf::DW_FORM_ref4, AbsDIE);
+    VariableCU->addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
+                       dwarf::DW_FORM_ref4, AbsDIE);
   else {
-    addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-    addSourceLine(VariableDie, DV->getVariable());
+    VariableCU->addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+                          Name);
+    VariableCU->addSourceLine(VariableDie, DV->getVariable());
 
     // Add variable type.
-    addType(VariableDie, DV->getType());
+    VariableCU->addType(VariableDie, DV->getType());
   }
 
   if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial())
-    addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+    VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, 
+                        dwarf::DW_FORM_flag, 1);
   else if (DIVariable(DV->getVariable()).isArtificial())
-    addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+    VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, 
+                        dwarf::DW_FORM_flag, 1);
 
   if (Scope->isAbstractScope()) {
     DV->setDIE(VariableDie);
@@ -1718,7 +679,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
 
   unsigned Offset = DV->getDotDebugLocOffset();
   if (Offset != ~0U) {
-    addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4,
+    VariableCU->addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4,
              Asm->GetTempSymbol("debug_loc", Offset));
     DV->setDIE(VariableDie);
     UseDotDebugLocEntry.insert(VariableDie);
@@ -1738,22 +699,30 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
         const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
         if (DVInsn->getOperand(1).isImm() &&
             TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
-          addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm());
-          updated = true;
-        } else
-          updated = addRegisterAddress(VariableDie, RegOp);
+          unsigned FrameReg = 0;
+          const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+          int Offset = 
+            TFI->getFrameIndexReference(*Asm->MF, 
+                                        DVInsn->getOperand(1).getImm(), 
+                                        FrameReg);
+          MachineLocation Location(FrameReg, Offset);
+          VariableCU->addVariableAddress(DV, VariableDie, Location);
+          
+        } else if (RegOp.getReg())
+          VariableCU->addVariableAddress(DV, VariableDie, 
+                                         MachineLocation(RegOp.getReg()));
+        updated = true;
       }
       else if (DVInsn->getOperand(0).isImm())
-        updated = addConstantValue(VariableDie, DVInsn->getOperand(0));
+        updated = VariableCU->addConstantValue(VariableDie, 
+                                               DVInsn->getOperand(0));
       else if (DVInsn->getOperand(0).isFPImm())
         updated =
-          addConstantFPValue(VariableDie, DVInsn->getOperand(0));
+          VariableCU->addConstantFPValue(VariableDie, DVInsn->getOperand(0));
     } else {
-      MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
-      if (Location.getReg()) {
-        addAddress(VariableDie, dwarf::DW_AT_location, Location);
-        updated = true;
-      }
+      VariableCU->addVariableAddress(DV, VariableDie, 
+                                     Asm->getDebugValueLocation(DVInsn));
+      updated = true;
     }
     if (!updated) {
       // If variableDie is not updated then DBG_VALUE instruction does not
@@ -1767,15 +736,21 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
 
   // .. else use frame index, if available.
   int FI = 0;
-  if (findVariableFrameIndex(DV, &FI))
-    addVariableAddress(DV, VariableDie, FI);
-  
+  if (findVariableFrameIndex(DV, &FI)) {
+    unsigned FrameReg = 0;
+    const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+    int Offset = 
+      TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+    MachineLocation Location(FrameReg, Offset);
+    VariableCU->addVariableAddress(DV, VariableDie, Location);
+  }
+
   DV->setDIE(VariableDie);
   return VariableDie;
 
 }
 
-void DwarfDebug::addPubTypes(DISubprogram SP) {
+void CompileUnit::addPubTypes(DISubprogram SP) {
   DICompositeType SPTy = SP.getType();
   unsigned SPTag = SPTy.getTag();
   if (SPTag != dwarf::DW_TAG_subroutine_type)
@@ -1789,9 +764,8 @@ void DwarfDebug::addPubTypes(DISubprogram SP) {
     DICompositeType CATy = getDICompositeType(ATy);
     if (DIDescriptor(CATy).Verify() && !CATy.getName().empty()
         && !CATy.isForwardDecl()) {
-      CompileUnit *TheCU = getCompileUnit(CATy);
-      if (DIEEntry *Entry = TheCU->getDIEEntry(CATy))
-        TheCU->addGlobalType(CATy.getName(), Entry->getEntry());
+      if (DIEEntry *Entry = getDIEEntry(CATy))
+        addGlobalType(CATy.getName(), Entry->getEntry());
     }
   }
 }
@@ -1802,6 +776,14 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
     return NULL;
 
   SmallVector <DIE *, 8> Children;
+
+  // Collect arguments for current function.
+  if (Scope == CurrentFnDbgScope)
+    for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i)
+      if (DbgVariable *ArgDV = CurrentFnArguments[i])
+        if (DIE *Arg = constructVariableDIE(ArgDV, Scope))
+          Children.push_back(Arg);
+
   // Collect lexical scope childrens first.
   const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
   for (unsigned i = 0, N = Variables.size(); i < N; ++i)
@@ -1841,7 +823,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
     ScopeDIE->addChild(*I);
 
   if (DS.isSubprogram())
-    addPubTypes(DISubprogram(DS));
+    getCompileUnit(DS)->addPubTypes(DISubprogram(DS));
 
  return ScopeDIE;
 }
@@ -1851,10 +833,21 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
 /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
 /// maps as well.
 
-unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName){
+unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, 
+                                         StringRef DirName) {
   // If FE did not provide a file name, then assume stdin.
   if (FileName.empty())
-    return GetOrCreateSourceID("<stdin>");
+    return GetOrCreateSourceID("<stdin>", StringRef());
+
+  // MCStream expects full path name as filename.
+  if (!DirName.empty() && !FileName.startswith("/")) {
+    std::string FullPathName(DirName.data());
+    if (!DirName.endswith("/"))
+      FullPathName += "/";
+    FullPathName += FileName.data();
+    // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
+    return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
+  }
 
   StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
   if (Entry.getValue())
@@ -1864,19 +857,18 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName){
   Entry.setValue(SrcId);
 
   // Print out a .file directive to specify files for .loc directives.
-  Asm->OutStreamer.EmitDwarfFileDirective(SrcId, FileName);
+  Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey());
 
   return SrcId;
 }
 
 /// getOrCreateNameSpace - Create a DIE for DINameSpace.
-DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) {
-  CompileUnit *TheCU = getCompileUnit(NS);
-  DIE *NDie = TheCU->getDIE(NS);
+DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) {
+  DIE *NDie = getDIE(NS);
   if (NDie)
     return NDie;
   NDie = new DIE(dwarf::DW_TAG_namespace);
-  TheCU->insertDIE(NS, NDie);
+  insertDIE(NS, NDie);
   if (!NS.getName().empty())
     addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
   addSourceLine(NDie, NS);
@@ -1890,40 +882,40 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) {
   DICompileUnit DIUnit(N);
   StringRef FN = DIUnit.getFilename();
   StringRef Dir = DIUnit.getDirectory();
-  unsigned ID = GetOrCreateSourceID(FN);
+  unsigned ID = GetOrCreateSourceID(FN, Dir);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
-  addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
-            DIUnit.getProducer());
-  addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
-          DIUnit.getLanguage());
-  addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
+  CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this);
+  NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
+                   DIUnit.getProducer());
+  NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+                 DIUnit.getLanguage());
+  NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
   // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
   // simplifies debug range entries.
-  addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
+  NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
   // DW_AT_stmt_list is a offset of line number information for this
   // compile unit in debug_line section.
   if (Asm->MAI->doesDwarfUsesAbsoluteLabelForStmtList())
-    addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr,
-             Asm->GetTempSymbol("section_line"));
+    NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr,
+                    Asm->GetTempSymbol("section_line"));
   else
-    addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+    NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
 
   if (!Dir.empty())
-    addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
+    NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
   if (DIUnit.isOptimized())
-    addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+    NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
 
   StringRef Flags = DIUnit.getFlags();
   if (!Flags.empty())
-    addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
-
+    NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
+  
   unsigned RVer = DIUnit.getRunTimeVersion();
   if (RVer)
-    addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+    NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
             dwarf::DW_FORM_data1, RVer);
 
-  CompileUnit *NewCU = new CompileUnit(ID, Die);
   if (!FirstCU)
     FirstCU = NewCU;
   CUMap.insert(std::make_pair(N, NewCU));
@@ -2019,14 +1011,15 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
   bool isGlobalVariable = GV.getGlobal() != NULL;
 
   // Add name.
-  addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
-            GV.getDisplayName());
+  TheCU->addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+                   GV.getDisplayName());
   StringRef LinkageName = GV.getLinkageName();
   if (!LinkageName.empty() && isGlobalVariable)
-    addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
-              getRealLinkageName(LinkageName));
+    TheCU->addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, 
+                     dwarf::DW_FORM_string,
+                     getRealLinkageName(LinkageName));
   // Add type.
-  addType(VariableDIE, GTy);
+  TheCU->addType(VariableDIE, GTy);
   if (GTy.isCompositeType() && !GTy.getName().empty()
       && !GTy.isForwardDecl()) {
     DIEEntry *Entry = TheCU->getDIEEntry(GTy);
@@ -2035,22 +1028,22 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
   }
   // Add scoping info.
   if (!GV.isLocalToUnit()) {
-    addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+    TheCU->addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
     // Expose as global. 
     TheCU->addGlobal(GV.getName(), VariableDIE);
   }
   // Add line number info.
-  addSourceLine(VariableDIE, GV);
+  TheCU->addSourceLine(VariableDIE, GV);
   // Add to map.
   TheCU->insertDIE(N, VariableDIE);
   // Add to context owner.
   DIDescriptor GVContext = GV.getContext();
-  addToContextOwner(VariableDIE, GVContext);
+  TheCU->addToContextOwner(VariableDIE, GVContext);
   // Add location.
   if (isGlobalVariable) {
     DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
-    addLabel(Block, 0, dwarf::DW_FORM_udata,
+    TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+    TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata,
              Asm->Mang->getSymbol(GV.getGlobal()));
     // Do not create specification DIE if context is either compile unit
     // or a subprogram.
@@ -2058,28 +1051,28 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
         !GVContext.isFile() && !isSubprogramContext(GVContext)) {
       // Create specification DIE.
       DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
-      addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
+      TheCU->addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
                   dwarf::DW_FORM_ref4, VariableDIE);
-      addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
-      addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+      TheCU->addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
+      TheCU->addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
       TheCU->addDie(VariableSpecDIE);
     } else {
-      addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+      TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
     } 
   } else if (ConstantInt *CI = 
              dyn_cast_or_null<ConstantInt>(GV.getConstant()))
-    addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy));
+    TheCU->addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy));
   else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
     // GV is a merged global.
     DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
-    addLabel(Block, 0, dwarf::DW_FORM_udata,
-             Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0))));
+    TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+    TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata,
+                    Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0))));
     ConstantInt *CII = cast<ConstantInt>(CE->getOperand(2));
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue());
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
-    addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+    TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    TheCU->addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue());
+    TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+    TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
   }
 
   return;
@@ -2105,7 +1098,7 @@ void DwarfDebug::constructSubprogramDIE(const MDNode *N) {
   TheCU->insertDIE(N, SubprogramDie);
 
   // Add to context owner.
-  addToContextOwner(SubprogramDie, SP.getContext());
+  TheCU->addToContextOwner(SubprogramDie, SP.getContext());
 
   // Expose as global.
   TheCU->addGlobal(SP.getName(), SubprogramDie);
@@ -2160,12 +1153,16 @@ void DwarfDebug::beginModule(Module *M) {
 
   //getOrCreateTypeDIE
   if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum"))
-    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
-      getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+      DIType Ty(NMD->getOperand(i));
+      getCompileUnit(Ty)->getOrCreateTypeDIE(Ty);
+    }
 
   if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
-    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
-      getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+      DIType Ty(NMD->getOperand(i));
+      getCompileUnit(Ty)->getOrCreateTypeDIE(Ty);
+    }
 
   // Prime section data.
   SectionMap.insert(Asm->getObjFileLowering().getTextSection());
@@ -2216,7 +1213,7 @@ void DwarfDebug::endModule() {
   for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
          AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
     DIE *ISP = *AI;
-    addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+    FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
   }
 
   for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
@@ -2226,7 +1223,8 @@ void DwarfDebug::endModule() {
     if (!N) continue;
     DIE *NDie = getCompileUnit(N)->getDIE(N);
     if (!NDie) continue;
-    addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+    getCompileUnit(N)->addDIEEntry(SPDie, dwarf::DW_AT_containing_type, 
+                                   dwarf::DW_FORM_ref4, NDie);
   }
 
   // Standard sections final addresses.
@@ -2309,6 +1307,30 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
   return AbsDbgVariable;
 }
 
+/// addCurrentFnArgument - If Var is an current function argument that add
+/// it in CurrentFnArguments list.
+bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
+                                      DbgVariable *Var, DbgScope *Scope) {
+  if (Scope != CurrentFnDbgScope) 
+    return false;
+  DIVariable DV = Var->getVariable();
+  if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+    return false;
+  unsigned ArgNo = DV.getArgNumber();
+  if (ArgNo == 0) 
+    return false;
+
+  size_t Size = CurrentFnArguments.size();
+  if (Size == 0)
+    CurrentFnArguments.resize(MF->getFunction()->arg_size());
+  // llvm::Function argument size is not good indicator of how many
+  // arguments does the function have at source level.
+  if (ArgNo > Size)
+    CurrentFnArguments.resize(ArgNo * 2);
+  CurrentFnArguments[ArgNo - 1] = Var;
+  return true;
+}
+
 /// collectVariableInfoFromMMITable - Collect variable information from
 /// side table maintained by MMI.
 void
@@ -2337,7 +1359,8 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
     DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second);
     DbgVariable *RegVar = new DbgVariable(DV);
     recordVariableFrameIndex(RegVar, VP.first);
-    Scope->addVariable(RegVar);
+    if (!addCurrentFnArgument(MF, RegVar, Scope))
+      Scope->addVariable(RegVar);
     if (AbsDbgVariable) {
       recordVariableFrameIndex(AbsDbgVariable, VP.first);
       VarToAbstractVarMap[RegVar] = AbsDbgVariable;
@@ -2349,9 +1372,9 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
 /// DBG_VALUE instruction, is in a defined reg.
 static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
   assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
-  if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
-    return true;
-  return false;
+  return MI->getNumOperands() == 3 &&
+         MI->getOperand(0).isReg() && MI->getOperand(0).getReg() &&
+         MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
 }
 
 /// collectVariableInfo - Populate DbgScope entries with variables' info.
@@ -2362,41 +1385,21 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
   /// collection info from MMI table.
   collectVariableInfoFromMMITable(MF, Processed);
 
-  SmallVector<const MachineInstr *, 8> DbgValues;
-  // Collect variable information from DBG_VALUE machine instructions;
-  for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
-       I != E; ++I)
-    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
-         II != IE; ++II) {
-      const MachineInstr *MInsn = II;
-      if (!MInsn->isDebugValue())
-        continue;
-      DbgValues.push_back(MInsn);
-    }
-
-  // This is a collection of DBV_VALUE instructions describing same variable.
-  SmallVector<const MachineInstr *, 4> MultipleValues;
-  for(SmallVector<const MachineInstr *, 8>::iterator I = DbgValues.begin(),
-        E = DbgValues.end(); I != E; ++I) {
-    const MachineInstr *MInsn = *I;
-    MultipleValues.clear();
-    if (isDbgValueInDefinedReg(MInsn))
-      MultipleValues.push_back(MInsn);
-    DIVariable DV(MInsn->getOperand(MInsn->getNumOperands() - 1).getMetadata());
-    if (Processed.count(DV) != 0)
+  for (SmallVectorImpl<const MDNode*>::const_iterator
+         UVI = UserVariables.begin(), UVE = UserVariables.end(); UVI != UVE;
+         ++UVI) {
+    const MDNode *Var = *UVI;
+    if (Processed.count(Var))
       continue;
 
-    const MachineInstr *PrevMI = MInsn;
-    for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1,
-           ME = DbgValues.end(); MI != ME; ++MI) {
-      const MDNode *Var =
-        (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata();
-      if (Var == DV && 
-          !PrevMI->isIdenticalTo(*MI))
-        MultipleValues.push_back(*MI);
-      PrevMI = *MI;
-    }
+    // History contains relevant DBG_VALUE instructions for Var and instructions
+    // clobbering it.
+    SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var];
+    if (History.empty())
+      continue;
+    const MachineInstr *MInsn = History.front();
 
+    DIVariable DV(Var);
     DbgScope *Scope = NULL;
     if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
         DISubprogram(DV.getContext()).describes(MF->getFunction()))
@@ -2408,32 +1411,29 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
       continue;
 
     Processed.insert(DV);
+    assert(MInsn->isDebugValue() && "History must begin with debug value");
     DbgVariable *RegVar = new DbgVariable(DV);
-    Scope->addVariable(RegVar);
+    if (!addCurrentFnArgument(MF, RegVar, Scope))
+      Scope->addVariable(RegVar);
     if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) {
       DbgVariableToDbgInstMap[AbsVar] = MInsn;
       VarToAbstractVarMap[RegVar] = AbsVar;
     }
-    if (MultipleValues.size() <= 1) {
+
+    // Simple ranges that are fully coalesced.
+    if (History.size() <= 1 || (History.size() == 2 &&
+                                MInsn->isIdenticalTo(History.back()))) {
       DbgVariableToDbgInstMap[RegVar] = MInsn;
       continue;
     }
 
     // handle multiple DBG_VALUE instructions describing one variable.
-    if (DotDebugLocEntries.empty())
-      RegVar->setDotDebugLocOffset(0);
-    else
-      RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
-    const MachineInstr *Begin = NULL;
-    const MachineInstr *End = NULL;
-    for (SmallVector<const MachineInstr *, 4>::iterator
-           MVI = MultipleValues.begin(), MVE = MultipleValues.end();
-         MVI != MVE; ++MVI) {
-      if (!Begin) {
-        Begin = *MVI;
-        continue;
-      }
-      End = *MVI;
+    RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
+
+    for (SmallVectorImpl<const MachineInstr*>::const_iterator
+           HI = History.begin(), HE = History.end(); HI != HE; ++HI) {
+      const MachineInstr *Begin = *HI;
+      assert(Begin->isDebugValue() && "Invalid History entry");
       MachineLocation MLoc;
       if (Begin->getNumOperands() == 3) {
         if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm())
@@ -2441,25 +1441,32 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
       } else
         MLoc = Asm->getDebugValueLocation(Begin);
 
+      // FIXME: emitDebugLoc only understands registers.
+      if (!MLoc.getReg())
+        continue;
+
+      // Compute the range for a register location.
       const MCSymbol *FLabel = getLabelBeforeInsn(Begin);
-      const MCSymbol *SLabel = getLabelBeforeInsn(End);
-      if (MLoc.getReg())
-        DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc));
-
-      Begin = End;
-      if (MVI + 1 == MVE) {
-        // If End is the last instruction then its value is valid
-        // until the end of the funtion.
-        MachineLocation EMLoc;
-        if (End->getNumOperands() == 3) {
-          if (End->getOperand(0).isReg() && Begin->getOperand(1).isImm())
-          EMLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm());
-        } else
-          EMLoc = Asm->getDebugValueLocation(End);
-        if (EMLoc.getReg()) 
-          DotDebugLocEntries.
-            push_back(DotDebugLocEntry(SLabel, FunctionEndSym, EMLoc));
+      const MCSymbol *SLabel = 0;
+
+      if (HI + 1 == HE)
+        // If Begin is the last instruction in History then its value is valid
+        // until the end of the function.
+        SLabel = FunctionEndSym;
+      else {
+        const MachineInstr *End = HI[1];
+        if (End->isDebugValue())
+          SLabel = getLabelBeforeInsn(End);
+        else {
+          // End is a normal instruction clobbering the range.
+          SLabel = getLabelAfterInsn(End);
+          assert(SLabel && "Forgot label after clobber instruction");
+          ++HI;
+        }
       }
+
+      // The value is valid until the next DBG_VALUE or clobber.
+      DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var));
     }
     DotDebugLocEntries.push_back(DotDebugLocEntry());
   }
@@ -2480,66 +1487,74 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
 
 /// getLabelBeforeInsn - Return Label preceding the instruction.
 const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
-  DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
-    LabelsBeforeInsn.find(MI);
-  if (I == LabelsBeforeInsn.end())
-    // FunctionBeginSym always preceeds all the instruction in current function.
-    return FunctionBeginSym;
-  return I->second;
+  MCSymbol *Label = LabelsBeforeInsn.lookup(MI);
+  assert(Label && "Didn't insert label before instruction");
+  return Label;
 }
 
 /// getLabelAfterInsn - Return Label immediately following the instruction.
 const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
-  DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
-    LabelsAfterInsn.find(MI);
-  if (I == LabelsAfterInsn.end())
-    return NULL;
-  return I->second;
+  return LabelsAfterInsn.lookup(MI);
 }
 
 /// beginInstruction - Process beginning of an instruction.
 void DwarfDebug::beginInstruction(const MachineInstr *MI) {
-  if (InsnNeedsLabel.count(MI) == 0) {
-    LabelsBeforeInsn[MI] = PrevLabel;
-    return;
+  // Check if source location changes, but ignore DBG_VALUE locations.
+  if (!MI->isDebugValue()) {
+    DebugLoc DL = MI->getDebugLoc();
+    if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) {
+      PrevInstLoc = DL;
+      if (!DL.isUnknown()) {
+        const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
+        recordSourceLine(DL.getLine(), DL.getCol(), Scope);
+      } else
+        recordSourceLine(0, 0, 0);
+    }
   }
 
-  // Check location.
-  DebugLoc DL = MI->getDebugLoc();
-  if (!DL.isUnknown()) {
-    const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
-    PrevLabel = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
-    PrevInstLoc = DL;
-    LabelsBeforeInsn[MI] = PrevLabel;
-    return;
-  }
+  // Insert labels where requested.
+  DenseMap<const MachineInstr*, MCSymbol*>::iterator I =
+    LabelsBeforeInsn.find(MI);
 
-  // If location is unknown then use temp label for this DBG_VALUE
-  // instruction.
-  if (MI->isDebugValue()) {
-    PrevLabel = MMI->getContext().CreateTempSymbol();
-    Asm->OutStreamer.EmitLabel(PrevLabel);
-    LabelsBeforeInsn[MI] = PrevLabel;
+  // No label needed.
+  if (I == LabelsBeforeInsn.end())
     return;
-  }
 
-  if (UnknownLocations) {
-    PrevLabel = recordSourceLine(0, 0, 0);
-    LabelsBeforeInsn[MI] = PrevLabel;
+  // Label already assigned.
+  if (I->second)
     return;
-  }
 
-  assert (0 && "Instruction is not processed!");
+  if (!PrevLabel) {
+    PrevLabel = MMI->getContext().CreateTempSymbol();
+    Asm->OutStreamer.EmitLabel(PrevLabel);
+  }
+  I->second = PrevLabel;
 }
 
 /// endInstruction - Process end of an instruction.
 void DwarfDebug::endInstruction(const MachineInstr *MI) {
-  if (InsnsEndScopeSet.count(MI) != 0) {
-    // Emit a label if this instruction ends a scope.
-    MCSymbol *Label = MMI->getContext().CreateTempSymbol();
-    Asm->OutStreamer.EmitLabel(Label);
-    LabelsAfterInsn[MI] = Label;
+  // Don't create a new label after DBG_VALUE instructions.
+  // They don't generate code.
+  if (!MI->isDebugValue())
+    PrevLabel = 0;
+
+  DenseMap<const MachineInstr*, MCSymbol*>::iterator I =
+    LabelsAfterInsn.find(MI);
+
+  // No label needed.
+  if (I == LabelsAfterInsn.end())
+    return;
+
+  // Label already assigned.
+  if (I->second)
+    return;
+
+  // We need a label after this instruction.
+  if (!PrevLabel) {
+    PrevLabel = MMI->getContext().CreateTempSymbol();
+    Asm->OutStreamer.EmitLabel(PrevLabel);
   }
+  I->second = PrevLabel;
 }
 
 /// getOrCreateDbgScope - Create DbgScope for the scope.
@@ -2799,7 +1814,8 @@ void DwarfDebug::identifyScopeMarkers() {
            RE = Ranges.end(); RI != RE; ++RI) {
       assert(RI->first && "DbgRange does not have first instruction!");
       assert(RI->second && "DbgRange does not have second instruction!");
-      InsnsEndScopeSet.insert(RI->second);
+      requestLabelBeforeInsn(RI->first);
+      requestLabelAfterInsn(RI->second);
     }
   }
 }
@@ -2877,45 +1893,145 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
 
   recordSourceLine(Line, Col, TheScope);
 
+  assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned");
+
   /// ProcessedArgs - Collection of arguments already processed.
   SmallPtrSet<const MDNode *, 8> ProcessedArgs;
 
-  DebugLoc PrevLoc;
+  const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+
+  /// LiveUserVar - Map physreg numbers to the MDNode they contain.
+  std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs());
+
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
-       I != E; ++I)
+       I != E; ++I) {
+    bool AtBlockEntry = true;
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MI = II;
-      DebugLoc DL = MI->getDebugLoc();
+
       if (MI->isDebugValue()) {
         assert (MI->getNumOperands() > 1 && "Invalid machine instruction!");
-        DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata());
-        if (!DV.Verify()) continue;
-        // If DBG_VALUE is for a local variable then it needs a label.
-        if (DV.getTag() != dwarf::DW_TAG_arg_variable)
-          InsnNeedsLabel.insert(MI);
-        // DBG_VALUE for inlined functions argument needs a label.
-        else if (!DISubprogram(getDISubprogram(DV.getContext())).
-                 describes(MF->getFunction()))
-          InsnNeedsLabel.insert(MI);
-        // DBG_VALUE indicating argument location change needs a label.
-        else if (!ProcessedArgs.insert(DV))
-          InsnNeedsLabel.insert(MI);
+
+        // Keep track of user variables.
+        const MDNode *Var =
+          MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+
+        // Variable is in a register, we need to check for clobbers.
+        if (isDbgValueInDefinedReg(MI))
+          LiveUserVar[MI->getOperand(0).getReg()] = Var;
+
+        // Check the history of this variable.
+        SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var];
+        if (History.empty()) {
+          UserVariables.push_back(Var);
+          // The first mention of a function argument gets the FunctionBeginSym
+          // label, so arguments are visible when breaking at function entry.
+          DIVariable DV(Var);
+          if (DV.Verify() && DV.getTag() == dwarf::DW_TAG_arg_variable &&
+              DISubprogram(getDISubprogram(DV.getContext()))
+                .describes(MF->getFunction()))
+            LabelsBeforeInsn[MI] = FunctionBeginSym;
+        } else {
+          // We have seen this variable before. Try to coalesce DBG_VALUEs.
+          const MachineInstr *Prev = History.back();
+          if (Prev->isDebugValue()) {
+            // Coalesce identical entries at the end of History.
+            if (History.size() >= 2 &&
+                Prev->isIdenticalTo(History[History.size() - 2]))
+              History.pop_back();
+
+            // Terminate old register assignments that don't reach MI;
+            MachineFunction::const_iterator PrevMBB = Prev->getParent();
+            if (PrevMBB != I && (!AtBlockEntry || llvm::next(PrevMBB) != I) &&
+                isDbgValueInDefinedReg(Prev)) {
+              // Previous register assignment needs to terminate at the end of
+              // its basic block.
+              MachineBasicBlock::const_iterator LastMI =
+                PrevMBB->getLastNonDebugInstr();
+              if (LastMI == PrevMBB->end())
+                // Drop DBG_VALUE for empty range.
+                History.pop_back();
+              else {
+                // Terminate after LastMI.
+                History.push_back(LastMI);
+              }
+            }
+          }
+        }
+        History.push_back(MI);
       } else {
-        // If location is unknown then instruction needs a location only if
-        // UnknownLocations flag is set.
-        if (DL.isUnknown()) {
-          if (UnknownLocations && !PrevLoc.isUnknown())
-            InsnNeedsLabel.insert(MI);
-        } else if (DL != PrevLoc)
-          // Otherwise, instruction needs a location only if it is new location.
-          InsnNeedsLabel.insert(MI);
+        // Not a DBG_VALUE instruction.
+        if (!MI->isLabel())
+          AtBlockEntry = false;
+
+        // Check if the instruction clobbers any registers with debug vars.
+        for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+          if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg())
+            continue;
+          for (const unsigned *AI = TRI->getOverlaps(MOI->getReg());
+               unsigned Reg = *AI; ++AI) {
+            const MDNode *Var = LiveUserVar[Reg];
+            if (!Var)
+              continue;
+            // Reg is now clobbered.
+            LiveUserVar[Reg] = 0;
+
+            // Was MD last defined by a DBG_VALUE referring to Reg?
+            DbgValueHistoryMap::iterator HistI = DbgValues.find(Var);
+            if (HistI == DbgValues.end())
+              continue;
+            SmallVectorImpl<const MachineInstr*> &History = HistI->second;
+            if (History.empty())
+              continue;
+            const MachineInstr *Prev = History.back();
+            // Sanity-check: Register assignments are terminated at the end of
+            // their block.
+            if (!Prev->isDebugValue() || Prev->getParent() != MI->getParent())
+              continue;
+            // Is the variable still in Reg?
+            if (!isDbgValueInDefinedReg(Prev) ||
+                Prev->getOperand(0).getReg() != Reg)
+              continue;
+            // Var is clobbered. Make sure the next instruction gets a label.
+            History.push_back(MI);
+          }
+        }
       }
+    }
+  }
+
+  for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end();
+       I != E; ++I) {
+    SmallVectorImpl<const MachineInstr*> &History = I->second;
+    if (History.empty())
+      continue;
 
-      if (!DL.isUnknown() || UnknownLocations)
-        PrevLoc = DL;
+    // Make sure the final register assignments are terminated.
+    const MachineInstr *Prev = History.back();
+    if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) {
+      const MachineBasicBlock *PrevMBB = Prev->getParent();
+      MachineBasicBlock::const_iterator LastMI = PrevMBB->getLastNonDebugInstr();
+      if (LastMI == PrevMBB->end())
+        // Drop DBG_VALUE for empty range.
+        History.pop_back();
+      else {
+        // Terminate after LastMI.
+        History.push_back(LastMI);
+      }
     }
+    // Request labels for the full history.
+    for (unsigned i = 0, e = History.size(); i != e; ++i) {
+      const MachineInstr *MI = History[i];
+      if (MI->isDebugValue())
+        requestLabelBeforeInsn(MI);
+      else
+        requestLabelAfterInsn(MI);
+    }
+  }
 
+  PrevInstLoc = DebugLoc();
   PrevLabel = FunctionBeginSym;
 }
 
@@ -2963,8 +2079,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
     DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope);
 
     if (!DisableFramePointerElim(*MF))
-      addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
-              dwarf::DW_FORM_flag, 1);
+      getCompileUnit(CurrentFnDbgScope->getScopeNode())->addUInt(CurFnDIE, 
+                                                                 dwarf::DW_AT_APPLE_omit_frame_ptr,
+                                                                 dwarf::DW_FORM_flag, 1);
 
 
     DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(),
@@ -2973,12 +2090,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
 
   // Clear debug info
   CurrentFnDbgScope = NULL;
-  InsnNeedsLabel.clear();
+  DeleteContainerPointers(CurrentFnArguments);
   DbgVariableToFrameIndexMap.clear();
   VarToAbstractVarMap.clear();
   DbgVariableToDbgInstMap.clear();
   DeleteContainerSeconds(DbgScopeMap);
-  InsnsEndScopeSet.clear();
+  UserVariables.clear();
+  DbgValues.clear();
   ConcreteScopes.clear();
   DeleteContainerSeconds(AbstractScopes);
   AbstractScopesList.clear();
@@ -3029,10 +2147,9 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
 /// recordSourceLine - Register a source line with debug info. Returns the
 /// unique label that was emitted and which provides correspondence to
 /// the source line list.
-MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
-                                       const MDNode *S) {
+void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S){
   StringRef Fn;
-
+  StringRef Dir;
   unsigned Src = 1;
   if (S) {
     DIDescriptor Scope(S);
@@ -3040,27 +2157,26 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
     if (Scope.isCompileUnit()) {
       DICompileUnit CU(S);
       Fn = CU.getFilename();
+      Dir = CU.getDirectory();
     } else if (Scope.isFile()) {
       DIFile F(S);
       Fn = F.getFilename();
+      Dir = F.getDirectory();
     } else if (Scope.isSubprogram()) {
       DISubprogram SP(S);
       Fn = SP.getFilename();
+      Dir = SP.getDirectory();
     } else if (Scope.isLexicalBlock()) {
       DILexicalBlock DB(S);
       Fn = DB.getFilename();
+      Dir = DB.getDirectory();
     } else
       assert(0 && "Unexpected scope info");
 
-    Src = GetOrCreateSourceID(Fn);
+    Src = GetOrCreateSourceID(Fn, Dir);
   }
-
   Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, DWARF2_FLAG_IS_STMT,
-                                         0, 0);
-
-  MCSymbol *Label = MMI->getContext().CreateTempSymbol();
-  Asm->OutStreamer.EmitLabel(Label);
-  return Label;
+                                         0, 0, Fn);
 }
 
 //===----------------------------------------------------------------------===//
@@ -3118,17 +2234,15 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
 /// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
 ///
 void DwarfDebug::computeSizeAndOffsets() {
-  unsigned PrevOffset = 0;
   for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
          E = CUMap.end(); I != E; ++I) {
     // Compute size of compile unit header.
-    static unsigned Offset = PrevOffset +
+    unsigned Offset = 
       sizeof(int32_t) + // Length of Compilation Unit Info
       sizeof(int16_t) + // DWARF version number
       sizeof(int32_t) + // Offset Into Abbrev. Section
       sizeof(int8_t);   // Pointer Size (in bytes)
     computeSizeAndOffset(I->second->getCUDie(), Offset, true);
-    PrevOffset = Offset;
   }
 }
 
@@ -3289,8 +2403,7 @@ void DwarfDebug::emitDebugInfo() {
     unsigned ContentSize = Die->getSize() +
       sizeof(int16_t) + // DWARF version number
       sizeof(int32_t) + // Offset Into Abbrev. Section
-      sizeof(int8_t) +  // Pointer Size (in bytes)
-      sizeof(int32_t);  // FIXME - extra pad for gdb bug.
+      sizeof(int8_t);   // Pointer Size (in bytes)
 
     Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
     Asm->EmitInt32(ContentSize);
@@ -3303,12 +2416,6 @@ void DwarfDebug::emitDebugInfo() {
     Asm->EmitInt8(Asm->getTargetData().getPointerSize());
 
     emitDIE(Die);
-    // FIXME - extra padding for gdb bug.
-    Asm->OutStreamer.AddComment("4 extra padding bytes for GDB");
-    Asm->EmitInt8(0);
-    Asm->EmitInt8(0);
-    Asm->EmitInt8(0);
-    Asm->EmitInt8(0);
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
   }
 }
@@ -3614,32 +2721,38 @@ void DwarfDebug::emitDebugLoc() {
     } else {
       Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0);
       Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0);
-      const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-      unsigned Reg = RI->getDwarfRegNum(Entry.Loc.getReg(), false);
-      if (int Offset =  Entry.Loc.getOffset()) {
-        // If the value is at a certain offset from frame register then
-        // use DW_OP_fbreg.
-        unsigned OffsetSize = Offset ? MCAsmInfo::getSLEB128Size(Offset) : 1;
+      DIVariable DV(Entry.Variable);
+      if (DV.hasComplexAddress()) {
+        unsigned N = DV.getNumAddrElements();
+        unsigned i = 0;
         Asm->OutStreamer.AddComment("Loc expr size");
-        Asm->EmitInt16(1 + OffsetSize);
-        Asm->OutStreamer.AddComment(
-          dwarf::OperationEncodingString(dwarf::DW_OP_fbreg));
-        Asm->EmitInt8(dwarf::DW_OP_fbreg);
-        Asm->OutStreamer.AddComment("Offset");
-        Asm->EmitSLEB128(Offset);
-      } else {
-        if (Reg < 32) {
-          Asm->OutStreamer.AddComment("Loc expr size");
-          Asm->EmitInt16(1);
-          Asm->OutStreamer.AddComment(
-            dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
-          Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg);
+        if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
+          // If first address element is OpPlus then emit
+          // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+          MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1));
+          Asm->EmitInt16(Asm->getDwarfRegOpSize(Loc) + N - 2);
+          Asm->EmitDwarfRegOp(Loc);
+//          Asm->EmitULEB128(DV.getAddrElement(1));
+          i = 2;
         } else {
-          Asm->OutStreamer.AddComment("Loc expr size");
-          Asm->EmitInt16(1 + MCAsmInfo::getULEB128Size(Reg));
-          Asm->EmitInt8(dwarf::DW_OP_regx);
-          Asm->EmitULEB128(Reg);
+          Asm->EmitInt16(Asm->getDwarfRegOpSize(Entry.Loc) + N);
+          Asm->EmitDwarfRegOp(Entry.Loc);
+        }
+
+        // Emit remaining complex address elements.
+        for (; i < N; ++i) {
+          uint64_t Element = DV.getAddrElement(i);
+          if (Element == DIBuilder::OpPlus) {
+            Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+            Asm->EmitULEB128(DV.getAddrElement(++i));
+          } else if (Element == DIBuilder::OpDeref)
+            Asm->EmitInt8(dwarf::DW_OP_deref);
+          else llvm_unreachable("unknown Opcode found in complex address");
         }
+      } else {
+        Asm->OutStreamer.AddComment("Loc expr size");
+        Asm->EmitInt16(Asm->getDwarfRegOpSize(Entry.Loc));
+        Asm->EmitDwarfRegOp(Entry.Loc);
       }
     }
   }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 7df0510..25f2675 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -16,6 +16,7 @@
 
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "DIE.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
@@ -40,21 +41,6 @@ class DIE;
 class DIEBlock;
 class DIEEntry;
 
-class DIEnumerator;
-class DIDescriptor;
-class DIVariable;
-class DIGlobal;
-class DIGlobalVariable;
-class DISubprogram;
-class DIBasicType;
-class DIDerivedType;
-class DIType;
-class DINameSpace;
-class DISubrange;
-class DICompositeType;
-class DITemplateTypeParameter;
-class DITemplateValueParameter;
-
 //===----------------------------------------------------------------------===//
 /// SrcLineInfo - This class is used to record source line correspondence.
 ///
@@ -80,10 +66,12 @@ typedef struct DotDebugLocEntry {
   const MCSymbol *Begin;
   const MCSymbol *End;
   MachineLocation Loc;
+  const MDNode *Variable;
   bool Merged;
-  DotDebugLocEntry() : Begin(0), End(0), Merged(false) {}
-  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L) 
-    : Begin(B), End(E), Loc(L), Merged(false) {}
+  DotDebugLocEntry() : Begin(0), End(0), Variable(0), Merged(false) {}
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
+                   const MDNode *V) 
+    : Begin(B), End(E), Loc(L), Variable(V), Merged(false) {}
   /// Empty entries are also used as a trigger to emit temp label. Such
   /// labels are referenced is used to find debug_loc offset for a given DIE.
   bool isEmpty() { return Begin == 0 && End == 0; }
@@ -96,6 +84,43 @@ typedef struct DotDebugLocEntry {
   }
 } DotDebugLocEntry;
 
+//===----------------------------------------------------------------------===//
+/// DbgVariable - This class is used to track local variable information.
+///
+class DbgVariable {
+  DIVariable Var;                    // Variable Descriptor.
+  DIE *TheDIE;                       // Variable DIE.
+  unsigned DotDebugLocOffset;        // Offset in DotDebugLocEntries.
+public:
+  // AbsVar may be NULL.
+  DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {}
+
+  // Accessors.
+  DIVariable getVariable()           const { return Var; }
+  void setDIE(DIE *D)                      { TheDIE = D; }
+  DIE *getDIE()                      const { return TheDIE; }
+  void setDotDebugLocOffset(unsigned O)    { DotDebugLocOffset = O; }
+  unsigned getDotDebugLocOffset()    const { return DotDebugLocOffset; }
+  StringRef getName()                const { return Var.getName(); }
+  unsigned getTag()                  const { return Var.getTag(); }
+  bool variableHasComplexAddress()   const {
+    assert(Var.Verify() && "Invalid complex DbgVariable!");
+    return Var.hasComplexAddress();
+  }
+  bool isBlockByrefVariable()        const {
+    assert(Var.Verify() && "Invalid complex DbgVariable!");
+    return Var.isBlockByrefVariable();
+  }
+  unsigned getNumAddrElements()      const { 
+    assert(Var.Verify() && "Invalid complex DbgVariable!");
+    return Var.getNumAddrElements();
+  }
+  uint64_t getAddrElement(unsigned i) const {
+    return Var.getAddrElement(i);
+  }
+  DIType getType() const;
+};
+
 class DwarfDebug {
   /// Asm - Target of Dwarf emission.
   AsmPrinter *Asm;
@@ -122,12 +147,6 @@ class DwarfDebug {
   /// id mapped to a unique id.
   StringMap<unsigned> SourceIdMap;
 
-  /// DIEBlocks - A list of all the DIEBlocks in use.
-  std::vector<DIEBlock *> DIEBlocks;
-
-  // DIEValueAllocator - All DIEValues are allocated through this allocator.
-  BumpPtrAllocator DIEValueAllocator;
-
   /// StringPool - A String->Symbol mapping of strings used by indirect
   /// references.
   StringMap<std::pair<MCSymbol*, unsigned> > StringPool;
@@ -139,10 +158,13 @@ class DwarfDebug {
   ///
   UniqueVector<const MCSection*> SectionMap;
 
-  // CurrentFnDbgScope - Top level scope for the current function.
-  //
+  /// CurrentFnDbgScope - Top level scope for the current function.
+  ///
   DbgScope *CurrentFnDbgScope;
   
+  /// CurrentFnArguments - List of Arguments (DbgValues) for current function.
+  SmallVector<DbgVariable *, 8> CurrentFnArguments;
+
   /// DbgScopeMap - Tracks the scopes in the current function.  Owns the
   /// contained DbgScope*s.
   ///
@@ -195,10 +217,6 @@ class DwarfDebug {
   /// corresponds to the MDNode mapped with the subprogram DIE.
   DenseMap<DIE *, const MDNode *> ContainingTypeMap;
 
-  typedef SmallVector<DbgScope *, 2> ScopeVector;
-
-  SmallPtrSet<const MachineInstr *, 8> InsnsEndScopeSet;
-
   /// InlineInfo - Keep track of inlined functions and their location.  This
   /// information is used to populate debug_inlined section.
   typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
@@ -217,9 +235,16 @@ class DwarfDebug {
   /// instruction.
   DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
 
-  /// insnNeedsLabel - Collection of instructions that need a label to mark
-  /// a debuggging information entity.
-  SmallPtrSet<const MachineInstr *, 8> InsnNeedsLabel;
+  /// UserVariables - Every user variable mentioned by a DBG_VALUE instruction
+  /// in order of appearance.
+  SmallVector<const MDNode*, 8> UserVariables;
+
+  /// DbgValues - For each user variable, keep a list of DBG_VALUE
+  /// instructions in order. The list can also contain normal instructions that
+  /// clobber the previous DBG_VALUE.
+  typedef DenseMap<const MDNode*, SmallVector<const MachineInstr*, 4> >
+    DbgValueHistoryMap;
+  DbgValueHistoryMap DbgValues;
 
   SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
 
@@ -238,6 +263,9 @@ class DwarfDebug {
 
   std::vector<FunctionDebugFrameInfo> DebugFrames;
 
+  // DIEValueAllocator - All DIEValues are allocated through this allocator.
+  BumpPtrAllocator DIEValueAllocator;
+
   // Section Symbols: these are assembler temporary labels that are emitted at
   // the beginning of each supported dwarf section.  These are used to form
   // section offsets and are created by EmitSectionLabels.
@@ -246,150 +274,12 @@ class DwarfDebug {
   MCSymbol *DwarfDebugLocSectionSym;
   MCSymbol *FunctionBeginSym, *FunctionEndSym;
 
-  DIEInteger *DIEIntegerOne;
 private:
 
-  /// getNumSourceIds - Return the number of unique source ids.
-  unsigned getNumSourceIds() const {
-    return SourceIdMap.size();
-  }
-
   /// assignAbbrevNumber - Define a unique number for the abbreviation.
   ///
   void assignAbbrevNumber(DIEAbbrev &Abbrev);
 
-  /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
-  /// information entry.
-  DIEEntry *createDIEEntry(DIE *Entry);
-
-  /// addUInt - Add an unsigned integer attribute data and value.
-  ///
-  void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
-
-  /// addSInt - Add an signed integer attribute data and value.
-  ///
-  void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
-
-  /// addString - Add a string attribute data and value.
-  ///
-  void addString(DIE *Die, unsigned Attribute, unsigned Form,
-                 const StringRef Str);
-
-  /// addLabel - Add a Dwarf label attribute data and value.
-  ///
-  void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
-                const MCSymbol *Label);
-
-  /// addDelta - Add a label delta attribute data and value.
-  ///
-  void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
-                const MCSymbol *Hi, const MCSymbol *Lo);
-
-  /// addDIEEntry - Add a DIE attribute data and value.
-  ///
-  void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry);
-  
-  /// addBlock - Add block data.
-  ///
-  void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
-
-  /// addSourceLine - Add location information to specified debug information
-  /// entry.
-  void addSourceLine(DIE *Die, DIVariable V);
-  void addSourceLine(DIE *Die, DIGlobalVariable G);
-  void addSourceLine(DIE *Die, DISubprogram SP);
-  void addSourceLine(DIE *Die, DIType Ty);
-  void addSourceLine(DIE *Die, DINameSpace NS);
-
-  /// addAddress - Add an address attribute to a die based on the location
-  /// provided.
-  void addAddress(DIE *Die, unsigned Attribute,
-                  const MachineLocation &Location);
-
-  /// addRegisterAddress - Add register location entry in variable DIE.
-  bool addRegisterAddress(DIE *Die, const MachineOperand &MO);
-
-  /// addConstantValue - Add constant value entry in variable DIE.
-  bool addConstantValue(DIE *Die, const MachineOperand &MO);
-  bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned);
-
-  /// addConstantFPValue - Add constant value entry in variable DIE.
-  bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
-
-  /// addComplexAddress - Start with the address based on the location provided,
-  /// and generate the DWARF information necessary to find the actual variable
-  /// (navigating the extra location information encoded in the type) based on
-  /// the starting location.  Add the DWARF information to the die.
-  ///
-  void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
-                         const MachineLocation &Location);
-
-  // FIXME: Should be reformulated in terms of addComplexAddress.
-  /// addBlockByrefAddress - Start with the address based on the location
-  /// provided, and generate the DWARF information necessary to find the
-  /// actual Block variable (navigating the Block struct) based on the
-  /// starting location.  Add the DWARF information to the die.  Obsolete,
-  /// please use addComplexAddress instead.
-  ///
-  void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
-                            const MachineLocation &Location);
-
-  /// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based
-  /// on provided frame index.
-  void addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI);
-
-  /// addToContextOwner - Add Die into the list of its context owner's children.
-  void addToContextOwner(DIE *Die, DIDescriptor Context);
-
-  /// addType - Add a new type attribute to the specified entity.
-  void addType(DIE *Entity, DIType Ty);
-
- 
-  /// getOrCreateNameSpace - Create a DIE for DINameSpace.
-  DIE *getOrCreateNameSpace(DINameSpace NS);
-
-  /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
-  /// given DIType.
-  DIE *getOrCreateTypeDIE(DIType Ty);
-
-  /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
-  /// for the given DITemplateTypeParameter.
-  DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
-
-  /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
-  /// for the given DITemplateValueParameter.
-  DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
-
-  void addPubTypes(DISubprogram SP);
-
-  /// constructTypeDIE - Construct basic type die from DIBasicType.
-  void constructTypeDIE(DIE &Buffer,
-                        DIBasicType BTy);
-
-  /// constructTypeDIE - Construct derived type die from DIDerivedType.
-  void constructTypeDIE(DIE &Buffer,
-                        DIDerivedType DTy);
-
-  /// constructTypeDIE - Construct type DIE from DICompositeType.
-  void constructTypeDIE(DIE &Buffer,
-                        DICompositeType CTy);
-
-  /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
-  void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
-
-  /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-  void constructArrayTypeDIE(DIE &Buffer, 
-                             DICompositeType *CTy);
-
-  /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-  DIE *constructEnumTypeDIE(DIEnumerator ETy);
-
-  /// createMemberDIE - Create new member DIE.
-  DIE *createMemberDIE(DIDerivedType DT);
-
-  /// createSubprogramDIE - Create new DIE using SP.
-  DIE *createSubprogramDIE(DISubprogram SP);
-
   /// getOrCreateDbgScope - Create DbgScope for the scope.
   DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt);
 
@@ -504,11 +394,6 @@ private:
   ///  inlining instance.
   void emitDebugInlineInfo();
 
-  /// GetOrCreateSourceID - Look up the source id with the given directory and
-  /// source file names. If none currently exists, create a new id and insert it
-  /// in the SourceIds map.
-  unsigned GetOrCreateSourceID(StringRef FullName);
-
   /// constructCompileUnit - Create new CompileUnit for the given 
   /// metadata node with tag DW_TAG_compile_unit.
   void constructCompileUnit(const MDNode *N);
@@ -525,7 +410,7 @@ private:
   /// recordSourceLine - Register a source line with debug info. Returns the
   /// unique label that was emitted and which provides correspondence to
   /// the source line list.
-  MCSymbol *recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope);
+  void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope);
   
   /// recordVariableFrameIndex - Record a variable's index.
   void recordVariableFrameIndex(const DbgVariable *V, int Index);
@@ -546,6 +431,11 @@ private:
   /// and collect DbgScopes. Return true, if atleast one scope was found.
   bool extractScopeInformation();
   
+  /// addCurrentFnArgument - If Var is an current function argument that add
+  /// it in CurrentFnArguments list.
+  bool addCurrentFnArgument(const MachineFunction *MF,
+                            DbgVariable *Var, DbgScope *Scope);
+
   /// collectVariableInfo - Populate DbgScope entries with variables' info.
   void collectVariableInfo(const MachineFunction *,
                            SmallPtrSet<const MDNode *, 16> &ProcessedVars);
@@ -554,6 +444,23 @@ private:
   /// side table maintained by MMI.
   void collectVariableInfoFromMMITable(const MachineFunction * MF,
                                        SmallPtrSet<const MDNode *, 16> &P);
+
+  /// requestLabelBeforeInsn - Ensure that a label will be emitted before MI.
+  void requestLabelBeforeInsn(const MachineInstr *MI) {
+    LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0));
+  }
+
+  /// getLabelBeforeInsn - Return Label preceding the instruction.
+  const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+
+  /// requestLabelAfterInsn - Ensure that a label will be emitted after MI.
+  void requestLabelAfterInsn(const MachineInstr *MI) {
+    LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0));
+  }
+
+  /// getLabelAfterInsn - Return Label immediately following the instruction.
+  const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
@@ -577,17 +484,19 @@ public:
   ///
   void endFunction(const MachineFunction *MF);
 
-  /// getLabelBeforeInsn - Return Label preceding the instruction.
-  const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
-
-  /// getLabelAfterInsn - Return Label immediately following the instruction.
-  const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
-
   /// beginInstruction - Process beginning of an instruction.
   void beginInstruction(const MachineInstr *MI);
 
   /// endInstruction - Prcess end of an instruction.
   void endInstruction(const MachineInstr *MI);
+
+  /// GetOrCreateSourceID - Look up the source id with the given directory and
+  /// source file names. If none currently exists, create a new id and insert it
+  /// in the SourceIds map.
+  unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName);
+
+  /// createSubprogramDIE - Create new DIE using SP.
+  DIE *createSubprogramDIE(DISubprogram SP);
 };
 } // End of namespace llvm
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index a172e53..f111641 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -140,17 +140,18 @@ public:
 };
 
 class DwarfCFIException : public DwarfException {
-  /// shouldEmitTable - Per-function flag to indicate if EH tables should
-  /// be emitted.
-  bool shouldEmitTable;
+  /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality
+  /// should be emitted.
+  bool shouldEmitPersonality;
+
+  /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda
+  /// should be emitted.
+  bool shouldEmitLSDA;
 
   /// shouldEmitMoves - Per-function flag to indicate if frame moves info
   /// should be emitted.
   bool shouldEmitMoves;
 
-  /// shouldEmitTableModule - Per-module flag to indicate if EH tables
-  /// should be emitted.
-  bool shouldEmitTableModule;
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
@@ -237,6 +238,38 @@ public:
   virtual void EndFunction();
 };
 
+
+class ARMException : public DwarfException {
+  /// shouldEmitTable - Per-function flag to indicate if EH tables should
+  /// be emitted.
+  bool shouldEmitTable;
+
+  /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+  /// should be emitted.
+  bool shouldEmitMoves;
+
+  /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+  /// should be emitted.
+  bool shouldEmitTableModule;
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  ARMException(AsmPrinter *A);
+  virtual ~ARMException();
+
+  /// EndModule - Emit all exception information that should come after the
+  /// content.
+  virtual void EndModule();
+
+  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// emitted immediately after the function entry point.
+  virtual void BeginFunction(const MachineFunction *MF);
+
+  /// EndFunction - Gather and emit post-function exception information.
+  virtual void EndFunction();
+};
+
 } // End of namespace llvm
 
 #endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
index 7519011..b50d8bd 100644
--- a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
@@ -92,7 +92,7 @@ void DwarfTableException::EmitCIE(const Function *PersonalityFn, unsigned Index)
   // personality function reference:
 
   unsigned LSDAEncoding = TLOF.getLSDAEncoding();
-  unsigned FDEEncoding = TLOF.getFDEEncoding();
+  unsigned FDEEncoding = TLOF.getFDEEncoding(false);
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
 
   char Augmentation[6] = { 0 };
@@ -168,7 +168,7 @@ void DwarfTableException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
 
   unsigned LSDAEncoding = TLOF.getLSDAEncoding();
-  unsigned FDEEncoding = TLOF.getFDEEncoding();
+  unsigned FDEEncoding = TLOF.getFDEEncoding(false);
 
   Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
 
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 78a8743..7704340 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -1048,7 +1048,7 @@ ReoptimizeBlock:
     // AnalyzeBranch.
     if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
         PrevBB.succ_size() == 1 &&
-        !MBB->hasAddressTaken()) {
+        !MBB->hasAddressTaken() && !MBB->isLandingPad()) {
       DEBUG(dbgs() << "\nMerging into block: " << PrevBB
                    << "From MBB: " << *MBB);
       PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index d7d0e1b..2ca3859 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -19,6 +19,7 @@ add_llvm_library(LLVMCodeGen
   GCStrategy.cpp
   IfConversion.cpp
   InlineSpiller.cpp
+  InterferenceCache.cpp
   IntrinsicLowering.cpp
   LLVMTargetMachine.cpp
   LatencyPriorityQueue.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 76bb3d1..e5894b8 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -87,8 +87,8 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg,
 }
 
 void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
-  MachineRegisterInfo &mri = mf_.getRegInfo();
-  const TargetRegisterInfo &tri = *mf_.getTarget().getRegisterInfo();
+  MachineRegisterInfo &mri = MF.getRegInfo();
+  const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo();
   MachineBasicBlock *mbb = 0;
   MachineLoop *loop = 0;
   unsigned loopDepth = 0;
@@ -103,6 +103,9 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
   // Don't recompute a target specific hint.
   bool noHint = mri.getRegAllocationHint(li.reg).first != 0;
 
+  // Don't recompute spill weight for an unspillable register.
+  bool Spillable = li.isSpillable();
+
   for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg);
        MachineInstr *mi = I.skipInstruction();) {
     if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
@@ -110,34 +113,37 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
     if (!visited.insert(mi))
       continue;
 
-    // Get loop info for mi.
-    if (mi->getParent() != mbb) {
-      mbb = mi->getParent();
-      loop = loops_.getLoopFor(mbb);
-      loopDepth = loop ? loop->getLoopDepth() : 0;
-      isExiting = loop ? loop->isLoopExiting(mbb) : false;
+    float weight = 1.0f;
+    if (Spillable) {
+      // Get loop info for mi.
+      if (mi->getParent() != mbb) {
+        mbb = mi->getParent();
+        loop = Loops.getLoopFor(mbb);
+        loopDepth = loop ? loop->getLoopDepth() : 0;
+        isExiting = loop ? loop->isLoopExiting(mbb) : false;
+      }
+
+      // Calculate instr weight.
+      bool reads, writes;
+      tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
+      weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth);
+
+      // Give extra weight to what looks like a loop induction variable update.
+      if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb))
+        weight *= 3;
+
+      totalWeight += weight;
     }
 
-    // Calculate instr weight.
-    bool reads, writes;
-    tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
-    float weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth);
-
-    // Give extra weight to what looks like a loop induction variable update.
-    if (writes && isExiting && lis_.isLiveOutOfMBB(li, mbb))
-      weight *= 3;
-
-    totalWeight += weight;
-
     // Get allocation hints from copies.
     if (noHint || !mi->isCopy())
       continue;
     unsigned hint = copyHint(mi, li.reg, tri, mri);
     if (!hint)
       continue;
-    float hweight = hint_[hint] += weight;
+    float hweight = Hint[hint] += weight;
     if (TargetRegisterInfo::isPhysicalRegister(hint)) {
-      if (hweight > bestPhys && lis_.isAllocatable(hint))
+      if (hweight > bestPhys && LIS.isAllocatable(hint))
         bestPhys = hweight, hintPhys = hint;
     } else {
       if (hweight > bestVirt)
@@ -145,15 +151,19 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
     }
   }
 
-  hint_.clear();
+  Hint.clear();
 
   // Always prefer the physreg hint.
   if (unsigned hint = hintPhys ? hintPhys : hintVirt) {
     mri.setRegAllocationHint(li.reg, 0, hint);
-    // Weakly boost the spill weifght of hinted registers.
+    // Weakly boost the spill weight of hinted registers.
     totalWeight *= 1.01F;
   }
 
+  // If the live interval was already unspillable, leave it that way.
+  if (!Spillable)
+    return;
+
   // Mark li as unspillable if all live ranges are tiny.
   if (li.isZeroLength()) {
     li.markNotSpillable();
@@ -166,8 +176,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
   // FIXME: this gets much more complicated once we support non-trivial
   // re-materialization.
   bool isLoad = false;
-  SmallVector<LiveInterval*, 4> spillIs;
-  if (lis_.isReMaterializable(li, spillIs, isLoad)) {
+  if (LIS.isReMaterializable(li, 0, isLoad)) {
     if (isLoad)
       totalWeight *= 0.9F;
     else
@@ -178,50 +187,29 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
 }
 
 void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
-  MachineRegisterInfo &mri = mf_.getRegInfo();
-  const TargetRegisterInfo *tri = mf_.getTarget().getRegisterInfo();
-  const TargetRegisterClass *orc = mri.getRegClass(reg);
-  SmallPtrSet<const TargetRegisterClass*,8> rcs;
-
-  for (MachineRegisterInfo::reg_nodbg_iterator I = mri.reg_nodbg_begin(reg),
-       E = mri.reg_nodbg_end(); I != E; ++I) {
-    // The targets don't have accurate enough regclass descriptions that we can
-    // handle subregs. We need something similar to
-    // TRI::getMatchingSuperRegClass, but returning a super class instead of a
-    // sub class.
-    if (I.getOperand().getSubReg()) {
-      DEBUG(dbgs() << "Cannot handle subregs: " << I.getOperand() << '\n');
-      return;
-    }
-    if (const TargetRegisterClass *rc =
-                                I->getDesc().getRegClass(I.getOperandNo(), tri))
-      rcs.insert(rc);
-  }
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+  const TargetRegisterClass *OldRC = MRI.getRegClass(reg);
+  const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC);
 
-  // If we found no regclass constraints, just leave reg as is.
-  // In theory, we could inflate to the largest superclass of reg's existing
-  // class, but that might not be legal for the current cpu setting.
-  // This could happen if reg is only used by COPY instructions, so we may need
-  // to improve on this.
-  if (rcs.empty()) {
+  // Stop early if there is no room to grow.
+  if (NewRC == OldRC)
     return;
-  }
 
-  // Compute the intersection of all classes in rcs.
-  // This ought to be independent of iteration order, but if the target register
-  // classes don't form a proper algebra, it is possible to get different
-  // results. The solution is to make sure the intersection of any two register
-  // classes is also a register class or the null set.
-  const TargetRegisterClass *rc = 0;
-  for (SmallPtrSet<const TargetRegisterClass*,8>::iterator I = rcs.begin(),
-         E = rcs.end(); I != E; ++I) {
-    rc = rc ? getCommonSubClass(rc, *I) : *I;
-    assert(rc && "Incompatible regclass constraints found");
+  // Accumulate constraints from all uses.
+  for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(reg),
+       E = MRI.reg_nodbg_end(); I != E; ++I) {
+    // TRI doesn't have accurate enough information to model this yet.
+    if (I.getOperand().getSubReg())
+      return;
+    const TargetRegisterClass *OpRC =
+      I->getDesc().getRegClass(I.getOperandNo(), TRI);
+    if (OpRC)
+      NewRC = getCommonSubClass(NewRC, OpRC);
+    if (!NewRC || NewRC == OldRC)
+      return;
   }
-
-  if (rc == orc)
-    return;
-  DEBUG(dbgs() << "Inflating " << orc->getName() << ':' << PrintReg(reg)
-               << " to " << rc->getName() <<".\n");
-  mri.setRegClass(reg, rc);
+  DEBUG(dbgs() << "Inflating " << OldRC->getName() << ':' << PrintReg(reg)
+               << " to " << NewRC->getName() <<".\n");
+  MRI.setRegClass(reg, NewRC);
 }
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 2ad80b4..bfb6ba1 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -19,15 +19,18 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLowering.h"
 using namespace llvm;
 
 CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm,
                  SmallVector<CCValAssign, 16> &locs, LLVMContext &C)
   : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
-    TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) {
+    TRI(*TM.getRegisterInfo()), Locs(locs), Context(C),
+    CallOrPrologue(Invalid) {
   // No stack is used.
   StackOffset = 0;
   
+  clearFirstByValReg();
   UsedRegs.resize((TRI.getNumRegs()+31)/32);
 }
 
@@ -44,8 +47,8 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
     Size = MinSize;
   if (MinAlign > (int)Align)
     Align = MinAlign;
+  TM.getTargetLowering()->HandleByVal(const_cast<CCState*>(this), Size);
   unsigned Offset = AllocateStack(Size, Align);
-
   addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
 }
 
@@ -155,7 +158,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
     if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Call result #" << i << " has unhandled type "
-             << EVT(VT).getEVTString();
+             << EVT(VT).getEVTString() << "\n";
 #endif
       llvm_unreachable(0);
     }
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 91a9536..270c337 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the pass that optimize code placement and align loop
-// headers to target specific alignment boundary.
+// This file implements the pass that optimizes code placement and aligns loop
+// headers to target-specific alignment boundaries.
 //
 //===----------------------------------------------------------------------===//
 
@@ -40,7 +40,7 @@ namespace {
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
     virtual const char *getPassName() const {
-      return "Code Placement Optimizater";
+      return "Code Placement Optimizer";
     }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -254,7 +254,7 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
 
   // Determine a position to move orphaned loop blocks to. If TopMBB is not
   // entered via fallthrough and BotMBB is exited via fallthrough, prepend them
-  // to the top of the loop to avoid loosing that fallthrough. Otherwise append
+  // to the top of the loop to avoid losing that fallthrough. Otherwise append
   // them to the bottom, even if it previously had a fallthrough, on the theory
   // that it's worth an extra branch to keep the loop contiguous.
   MachineFunction::iterator InsertPt =
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 0ebb5b0..34b1a39 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -93,7 +93,8 @@ namespace {
     /// with the eh.exception call. This recursively looks past instructions
     /// which don't change the EH pointer value, like casts or PHI nodes.
     bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
-                             SmallPtrSet<IntrinsicInst*, 8> &SelCalls);
+                             SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
+                             SmallPtrSet<PHINode*, 32> &SeenPHIs);
       
   public:
     static char ID; // Pass identification, replacement for typeid.
@@ -199,8 +200,8 @@ bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
 /// change the EH pointer value, like casts or PHI nodes.
 bool
 DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
-                                    SmallPtrSet<IntrinsicInst*, 8> &SelCalls) {
-  SmallPtrSet<PHINode*, 32> SeenPHIs;
+                                    SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
+                                    SmallPtrSet<PHINode*, 32> &SeenPHIs) {
   bool Changed = false;
 
   for (Value::use_iterator
@@ -215,11 +216,11 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
       if (Invoke->getCalledFunction() == URoR)
         URoRInvoke = true;
     } else if (CastInst *CI = dyn_cast<CastInst>(II)) {
-      Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls);
+      Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls, SeenPHIs);
     } else if (PHINode *PN = dyn_cast<PHINode>(II)) {
       if (SeenPHIs.insert(PN))
         // Don't process a PHI node more than once.
-        Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls);
+        Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls, SeenPHIs);
     }
   }
 
@@ -294,7 +295,8 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
 
       bool URoRInvoke = false;
       SmallPtrSet<IntrinsicInst*, 8> SelCalls;
-      Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls);
+      SmallPtrSet<PHINode*, 32> SeenPHIs;
+      Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls, SeenPHIs);
 
       if (URoRInvoke) {
         // This EH pointer is being used by an invoke of an URoR instruction and
@@ -437,8 +439,9 @@ bool DwarfEHPrepare::NormalizeLandingPads() {
       if (InVal == 0) {
         // Different unwind edges have different values.  Create a new PHI node
         // in NewBB.
-        PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".unwind",
-                                         NewBB);
+        PHINode *NewPN = PHINode::Create(PN->getType(),
+                                         PN->getNumIncomingValues(),
+                                         PN->getName()+".unwind", NewBB);
         // Add an entry for each unwind edge, using the value from the old PHI.
         for (pred_iterator PI = PB; PI != PE; ++PI)
           NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI);
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
index e08feeb..5b63468 100644
--- a/lib/CodeGen/ELF.h
+++ b/lib/CodeGen/ELF.h
@@ -173,7 +173,7 @@ namespace llvm {
     unsigned Offset;    // sh_offset - Offset from the file start
     unsigned Size;      // sh_size - The section size.
     unsigned Link;      // sh_link - Section header table index link.
-    unsigned Info;      // sh_info - Auxillary information.
+    unsigned Info;      // sh_info - Auxiliary information.
     unsigned Align;     // sh_addralign - Alignment of section.
     unsigned EntSize;   // sh_entsize - Size of entries in the section e
 
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index 0fd1e8e..fa2319b 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -77,7 +77,7 @@ ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
   // Create the object code emitter object for this target.
   ElfCE = new ELFCodeEmitter(*this);
 
-  // Inital number of sections
+  // Initial number of sections
   NumSections = 0;
 }
 
@@ -660,19 +660,21 @@ bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
 /// EmitXXStructorList - Emit the ctor or dtor list.  This just emits out the 
 /// function pointers, ignoring the init priority.
 void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) {
-  // Should be an array of '{ int, void ()* }' structs.  The first value is the
+  // Should be an array of '{ i32, void ()* }' structs.  The first value is the
   // init priority, which we ignore.
-  if (!isa<ConstantArray>(List)) return;
+  if (List->isNullValue()) return;
   ConstantArray *InitList = cast<ConstantArray>(List);
-  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
-    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
-      if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
-
-      if (CS->getOperand(1)->isNullValue())
-        return;  // Found a null terminator, exit printing.
-      // Emit the function pointer.
-      EmitGlobalConstant(CS->getOperand(1), Xtor);
-    }
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+    if (InitList->getOperand(i)->isNullValue())
+      continue;
+    ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i));
+
+    if (CS->getOperand(1)->isNullValue())
+      continue;
+
+    // Emit the function pointer.
+    EmitGlobalConstant(CS->getOperand(1), Xtor);
+  }
 }
 
 bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
index aed8bc9..646e014 100644
--- a/lib/CodeGen/EdgeBundles.cpp
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -53,6 +53,19 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
   EC.compress();
   if (ViewEdgeBundles)
     view();
+
+  // Compute the reverse mapping.
+  Blocks.clear();
+  Blocks.resize(getNumBundles());
+
+  for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) {
+    unsigned b0 = getBundle(i, 0);
+    unsigned b1 = getBundle(i, 1);
+    Blocks[b0].push_back(i);
+    if (b1 != b0)
+      Blocks[b1].push_back(i);
+  }
+
   return false;
 }
 
@@ -82,5 +95,3 @@ raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
   O << "}\n";
   return O;
 }
-
-
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index b5ec303..ebc2fc9 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Expand Psuedo-instructions produced by ISel. These are usually to allow
+// Expand Pseudo-instructions produced by ISel. These are usually to allow
 // the expansion to contain control flow, such as a conditional move
 // implemented with a conditional branch and a phi, or an atomic operation
 // implemented with a loop.
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index db53b04..790200b 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -27,7 +27,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -146,10 +145,6 @@ namespace {
         : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
     };
 
-    /// Roots - Basic blocks that do not have successors. These are the starting
-    /// points of Graph traversal.
-    std::vector<MachineBasicBlock*> Roots;
-
     /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
     /// basic block number.
     std::vector<BBInfo> BBAnalysis;
@@ -287,11 +282,6 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   MF.RenumberBlocks();
   BBAnalysis.resize(MF.getNumBlockIDs());
 
-  // Look for root nodes, i.e. blocks without successors.
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
-    if (I->succ_empty())
-      Roots.push_back(I);
-
   std::vector<IfcvtToken*> Tokens;
   MadeChange = false;
   unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
@@ -406,7 +396,6 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   }
 
   Tokens.clear();
-  Roots.clear();
   BBAnalysis.clear();
 
   if (MadeChange && IfCvtBranchFold) {
@@ -924,13 +913,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
 /// candidates.
 void IfConverter::AnalyzeBlocks(MachineFunction &MF,
                                 std::vector<IfcvtToken*> &Tokens) {
-  std::set<MachineBasicBlock*> Visited;
-  for (unsigned i = 0, e = Roots.size(); i != e; ++i) {
-    for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited),
-           E = idf_ext_end(Roots[i], Visited); I != E; ++I) {
-      MachineBasicBlock *BB = *I;
-      AnalyzeBlock(BB, Tokens);
-    }
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *BB = I;
+    AnalyzeBlock(BB, Tokens);
   }
 
   // Sort to favor more complex ifcvt scheme.
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 38e6c85..b1a33a6 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -19,41 +19,75 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
-static cl::opt<bool>
-VerifySpills("verify-spills", cl::desc("Verify after each spill/split"));
-
 namespace {
 class InlineSpiller : public Spiller {
-  MachineFunctionPass &pass_;
-  MachineFunction &mf_;
-  LiveIntervals &lis_;
-  LiveStacks &lss_;
-  AliasAnalysis *aa_;
-  VirtRegMap &vrm_;
-  MachineFrameInfo &mfi_;
-  MachineRegisterInfo &mri_;
-  const TargetInstrInfo &tii_;
-  const TargetRegisterInfo &tri_;
-  const BitVector reserved_;
+  MachineFunctionPass &Pass;
+  MachineFunction &MF;
+  LiveIntervals &LIS;
+  LiveStacks &LSS;
+  AliasAnalysis *AA;
+  MachineDominatorTree &MDT;
+  MachineLoopInfo &Loops;
+  VirtRegMap &VRM;
+  MachineFrameInfo &MFI;
+  MachineRegisterInfo &MRI;
+  const TargetInstrInfo &TII;
+  const TargetRegisterInfo &TRI;
 
   // Variables that are valid during spill(), but used by multiple methods.
-  LiveRangeEdit *edit_;
-  const TargetRegisterClass *rc_;
-  int stackSlot_;
+  LiveRangeEdit *Edit;
+  LiveInterval *StackInt;
+  int StackSlot;
+  unsigned Original;
+
+  // All registers to spill to StackSlot, including the main register.
+  SmallVector<unsigned, 8> RegsToSpill;
+
+  // All COPY instructions to/from snippets.
+  // They are ignored since both operands refer to the same stack slot.
+  SmallPtrSet<MachineInstr*, 8> SnippetCopies;
 
   // Values that failed to remat at some point.
-  SmallPtrSet<VNInfo*, 8> usedValues_;
+  SmallPtrSet<VNInfo*, 8> UsedValues;
+
+  // Information about a value that was defined by a copy from a sibling
+  // register.
+  struct SibValueInfo {
+    // True when all reaching defs were reloads: No spill is necessary.
+    bool AllDefsAreReloads;
+
+    // The preferred register to spill.
+    unsigned SpillReg;
+
+    // The value of SpillReg that should be spilled.
+    VNInfo *SpillVNI;
+
+    // A defining instruction that is not a sibling copy or a reload, or NULL.
+    // This can be used as a template for rematerialization.
+    MachineInstr *DefMI;
+
+    SibValueInfo(unsigned Reg, VNInfo *VNI)
+      : AllDefsAreReloads(false), SpillReg(Reg), SpillVNI(VNI), DefMI(0) {}
+  };
+
+  // Values in RegsToSpill defined by sibling copies.
+  typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap;
+  SibValueMap SibValues;
+
+  // Dead defs generated during spilling.
+  SmallVector<MachineInstr*, 8> DeadDefs;
 
   ~InlineSpiller() {}
 
@@ -61,34 +95,52 @@ public:
   InlineSpiller(MachineFunctionPass &pass,
                 MachineFunction &mf,
                 VirtRegMap &vrm)
-    : pass_(pass),
-      mf_(mf),
-      lis_(pass.getAnalysis<LiveIntervals>()),
-      lss_(pass.getAnalysis<LiveStacks>()),
-      aa_(&pass.getAnalysis<AliasAnalysis>()),
-      vrm_(vrm),
-      mfi_(*mf.getFrameInfo()),
-      mri_(mf.getRegInfo()),
-      tii_(*mf.getTarget().getInstrInfo()),
-      tri_(*mf.getTarget().getRegisterInfo()),
-      reserved_(tri_.getReservedRegs(mf_)) {}
-
-  void spill(LiveInterval *li,
-             SmallVectorImpl<LiveInterval*> &newIntervals,
-             const SmallVectorImpl<LiveInterval*> &spillIs);
+    : Pass(pass),
+      MF(mf),
+      LIS(pass.getAnalysis<LiveIntervals>()),
+      LSS(pass.getAnalysis<LiveStacks>()),
+      AA(&pass.getAnalysis<AliasAnalysis>()),
+      MDT(pass.getAnalysis<MachineDominatorTree>()),
+      Loops(pass.getAnalysis<MachineLoopInfo>()),
+      VRM(vrm),
+      MFI(*mf.getFrameInfo()),
+      MRI(mf.getRegInfo()),
+      TII(*mf.getTarget().getInstrInfo()),
+      TRI(*mf.getTarget().getRegisterInfo()) {}
 
   void spill(LiveRangeEdit &);
 
 private:
-  bool reMaterializeFor(MachineBasicBlock::iterator MI);
+  bool isSnippet(const LiveInterval &SnipLI);
+  void collectRegsToSpill();
+
+  bool isRegToSpill(unsigned Reg) {
+    return std::find(RegsToSpill.begin(),
+                     RegsToSpill.end(), Reg) != RegsToSpill.end();
+  }
+
+  bool isSibling(unsigned Reg);
+  MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*);
+  void analyzeSiblingValues();
+
+  bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI);
+  void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
+
+  void markValueUsed(LiveInterval*, VNInfo*);
+  bool reMaterializeFor(LiveInterval&, MachineBasicBlock::iterator MI);
   void reMaterializeAll();
 
-  bool coalesceStackAccess(MachineInstr *MI);
+  bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
   bool foldMemoryOperand(MachineBasicBlock::iterator MI,
                          const SmallVectorImpl<unsigned> &Ops,
                          MachineInstr *LoadMI = 0);
-  void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
-  void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
+  void insertReload(LiveInterval &NewLI, SlotIndex,
+                    MachineBasicBlock::iterator MI);
+  void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
+                   SlotIndex, MachineBasicBlock::iterator MI);
+
+  void spillAroundUses(unsigned Reg);
+  void spillAll();
 };
 }
 
@@ -96,45 +148,489 @@ namespace llvm {
 Spiller *createInlineSpiller(MachineFunctionPass &pass,
                              MachineFunction &mf,
                              VirtRegMap &vrm) {
-  if (VerifySpills)
-    mf.verify(&pass, "When creating inline spiller");
   return new InlineSpiller(pass, mf, vrm);
 }
 }
 
+//===----------------------------------------------------------------------===//
+//                                Snippets
+//===----------------------------------------------------------------------===//
+
+// When spilling a virtual register, we also spill any snippets it is connected
+// to. The snippets are small live ranges that only have a single real use,
+// leftovers from live range splitting. Spilling them enables memory operand
+// folding or tightens the live range around the single use.
+//
+// This minimizes register pressure and maximizes the store-to-load distance for
+// spill slots which can be important in tight loops.
+
+/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register,
+/// otherwise return 0.
+static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) {
+  if (!MI->isCopy())
+    return 0;
+  if (MI->getOperand(0).getSubReg() != 0)
+    return 0;
+  if (MI->getOperand(1).getSubReg() != 0)
+    return 0;
+  if (MI->getOperand(0).getReg() == Reg)
+      return MI->getOperand(1).getReg();
+  if (MI->getOperand(1).getReg() == Reg)
+      return MI->getOperand(0).getReg();
+  return 0;
+}
+
+/// isSnippet - Identify if a live interval is a snippet that should be spilled.
+/// It is assumed that SnipLI is a virtual register with the same original as
+/// Edit->getReg().
+bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
+  unsigned Reg = Edit->getReg();
+
+  // A snippet is a tiny live range with only a single instruction using it
+  // besides copies to/from Reg or spills/fills. We accept:
+  //
+  //   %snip = COPY %Reg / FILL fi#
+  //   %snip = USE %snip
+  //   %Reg = COPY %snip / SPILL %snip, fi#
+  //
+  if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI))
+    return false;
+
+  MachineInstr *UseMI = 0;
+
+  // Check that all uses satisfy our criteria.
+  for (MachineRegisterInfo::reg_nodbg_iterator
+         RI = MRI.reg_nodbg_begin(SnipLI.reg);
+       MachineInstr *MI = RI.skipInstruction();) {
+
+    // Allow copies to/from Reg.
+    if (isFullCopyOf(MI, Reg))
+      continue;
+
+    // Allow stack slot loads.
+    int FI;
+    if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot)
+      continue;
+
+    // Allow stack slot stores.
+    if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot)
+      continue;
+
+    // Allow a single additional instruction.
+    if (UseMI && MI != UseMI)
+      return false;
+    UseMI = MI;
+  }
+  return true;
+}
+
+/// collectRegsToSpill - Collect live range snippets that only have a single
+/// real use.
+void InlineSpiller::collectRegsToSpill() {
+  unsigned Reg = Edit->getReg();
+
+  // Main register always spills.
+  RegsToSpill.assign(1, Reg);
+  SnippetCopies.clear();
+
+  // Snippets all have the same original, so there can't be any for an original
+  // register.
+  if (Original == Reg)
+    return;
+
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg);
+       MachineInstr *MI = RI.skipInstruction();) {
+    unsigned SnipReg = isFullCopyOf(MI, Reg);
+    if (!isSibling(SnipReg))
+      continue;
+    LiveInterval &SnipLI = LIS.getInterval(SnipReg);
+    if (!isSnippet(SnipLI))
+      continue;
+    SnippetCopies.insert(MI);
+    if (!isRegToSpill(SnipReg))
+      RegsToSpill.push_back(SnipReg);
+
+    DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n');
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                            Sibling Values
+//===----------------------------------------------------------------------===//
+
+// After live range splitting, some values to be spilled may be defined by
+// copies from sibling registers. We trace the sibling copies back to the
+// original value if it still exists. We need it for rematerialization.
+//
+// Even when the value can't be rematerialized, we still want to determine if
+// the value has already been spilled, or we may want to hoist the spill from a
+// loop.
+
+bool InlineSpiller::isSibling(unsigned Reg) {
+  return TargetRegisterInfo::isVirtualRegister(Reg) &&
+           VRM.getOriginal(Reg) == Original;
+}
+
+/// traceSiblingValue - Trace a value that is about to be spilled back to the
+/// real defining instructions by looking through sibling copies. Always stay
+/// within the range of OrigVNI so the registers are known to carry the same
+/// value.
+///
+/// Determine if the value is defined by all reloads, so spilling isn't
+/// necessary - the value is already in the stack slot.
+///
+/// Return a defining instruction that may be a candidate for rematerialization.
+///
+MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
+                                               VNInfo *OrigVNI) {
+  DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':'
+               << UseVNI->id << '@' << UseVNI->def << '\n');
+  SmallPtrSet<VNInfo*, 8> Visited;
+  SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList;
+  WorkList.push_back(std::make_pair(UseReg, UseVNI));
+
+  // Best spill candidate seen so far. This must dominate UseVNI.
+  SibValueInfo SVI(UseReg, UseVNI);
+  MachineBasicBlock *UseMBB = LIS.getMBBFromIndex(UseVNI->def);
+  unsigned SpillDepth = Loops.getLoopDepth(UseMBB);
+  bool SeenOrigPHI = false; // Original PHI met.
+
+  do {
+    unsigned Reg;
+    VNInfo *VNI;
+    tie(Reg, VNI) = WorkList.pop_back_val();
+    if (!Visited.insert(VNI))
+      continue;
+
+    // Is this value a better spill candidate?
+    if (!isRegToSpill(Reg)) {
+      MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+      if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) {
+        // This is a valid spill location dominating UseVNI.
+        // Prefer to spill at a smaller loop depth.
+        unsigned Depth = Loops.getLoopDepth(MBB);
+        if (Depth < SpillDepth) {
+          DEBUG(dbgs() << "  spill depth " << Depth << ": " << PrintReg(Reg)
+                       << ':' << VNI->id << '@' << VNI->def << '\n');
+          SVI.SpillReg = Reg;
+          SVI.SpillVNI = VNI;
+          SpillDepth = Depth;
+        }
+      }
+    }
+
+    // Trace through PHI-defs created by live range splitting.
+    if (VNI->isPHIDef()) {
+      if (VNI->def == OrigVNI->def) {
+        DEBUG(dbgs() << "  orig phi value " << PrintReg(Reg) << ':'
+                     << VNI->id << '@' << VNI->def << '\n');
+        SeenOrigPHI = true;
+        continue;
+      }
+      // Get values live-out of predecessors.
+      LiveInterval &LI = LIS.getInterval(Reg);
+      MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+             PE = MBB->pred_end(); PI != PE; ++PI) {
+        VNInfo *PVNI = LI.getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot());
+        if (PVNI)
+          WorkList.push_back(std::make_pair(Reg, PVNI));
+      }
+      continue;
+    }
+
+    MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+    assert(MI && "Missing def");
+
+    // Trace through sibling copies.
+    if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
+      if (isSibling(SrcReg)) {
+        LiveInterval &SrcLI = LIS.getInterval(SrcReg);
+        VNInfo *SrcVNI = SrcLI.getVNInfoAt(VNI->def.getUseIndex());
+        assert(SrcVNI && "Copy from non-existing value");
+        DEBUG(dbgs() << "  copy of " << PrintReg(SrcReg) << ':'
+                     << SrcVNI->id << '@' << SrcVNI->def << '\n');
+        WorkList.push_back(std::make_pair(SrcReg, SrcVNI));
+        continue;
+      }
+    }
+
+    // Track reachable reloads.
+    int FI;
+    if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) {
+      DEBUG(dbgs() << "  reload " << PrintReg(Reg) << ':'
+                   << VNI->id << "@" << VNI->def << '\n');
+      SVI.AllDefsAreReloads = true;
+      continue;
+    }
+
+    // We have an 'original' def. Don't record trivial cases.
+    if (VNI == UseVNI) {
+      DEBUG(dbgs() << "Not a sibling copy.\n");
+      return MI;
+    }
+
+    // Potential remat candidate.
+    DEBUG(dbgs() << "  def " << PrintReg(Reg) << ':'
+                 << VNI->id << '@' << VNI->def << '\t' << *MI);
+    SVI.DefMI = MI;
+  } while (!WorkList.empty());
+
+  if (SeenOrigPHI || SVI.DefMI)
+    SVI.AllDefsAreReloads = false;
+
+  DEBUG({
+    if (SVI.AllDefsAreReloads)
+      dbgs() << "All defs are reloads.\n";
+    else
+      dbgs() << "Prefer to spill " << PrintReg(SVI.SpillReg) << ':'
+             << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def << '\n';
+  });
+  SibValues.insert(std::make_pair(UseVNI, SVI));
+  return SVI.DefMI;
+}
+
+/// analyzeSiblingValues - Trace values defined by sibling copies back to
+/// something that isn't a sibling copy.
+///
+/// Keep track of values that may be rematerializable.
+void InlineSpiller::analyzeSiblingValues() {
+  SibValues.clear();
+
+  // No siblings at all?
+  if (Edit->getReg() == Original)
+    return;
+
+  LiveInterval &OrigLI = LIS.getInterval(Original);
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+    unsigned Reg = RegsToSpill[i];
+    LiveInterval &LI = LIS.getInterval(Reg);
+    for (LiveInterval::const_vni_iterator VI = LI.vni_begin(),
+         VE = LI.vni_end(); VI != VE; ++VI) {
+      VNInfo *VNI = *VI;
+      if (VNI->isUnused())
+        continue;
+      MachineInstr *DefMI = 0;
+      // Check possible sibling copies.
+      if (VNI->isPHIDef() || VNI->getCopy()) {
+        VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+        if (OrigVNI->def != VNI->def)
+          DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
+      }
+      if (!DefMI && !VNI->isPHIDef())
+        DefMI = LIS.getInstructionFromIndex(VNI->def);
+      if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) {
+        DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
+                     << VNI->def << " may remat from " << *DefMI);
+      }
+    }
+  }
+}
+
+/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert
+/// a spill at a better location.
+bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
+  SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
+  VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex());
+  assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy");
+  SibValueMap::iterator I = SibValues.find(VNI);
+  if (I == SibValues.end())
+    return false;
+
+  const SibValueInfo &SVI = I->second;
+
+  // Let the normal folding code deal with the boring case.
+  if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI)
+    return false;
+
+  // SpillReg may have been deleted by remat and DCE.
+  if (!LIS.hasInterval(SVI.SpillReg)) {
+    DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n');
+    SibValues.erase(I);
+    return false;
+  }
+
+  LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg);
+  if (!SibLI.containsValue(SVI.SpillVNI)) {
+    DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n');
+    SibValues.erase(I);
+    return false;
+  }
+
+  // Conservatively extend the stack slot range to the range of the original
+  // value. We may be able to do better with stack slot coloring by being more
+  // careful here.
+  assert(StackInt && "No stack slot assigned yet.");
+  LiveInterval &OrigLI = LIS.getInterval(Original);
+  VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
+  StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0));
+  DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
+               << *StackInt << '\n');
+
+  // Already spilled everywhere.
+  if (SVI.AllDefsAreReloads)
+    return true;
+
+  // We are going to spill SVI.SpillVNI immediately after its def, so clear out
+  // any later spills of the same value.
+  eliminateRedundantSpills(SibLI, SVI.SpillVNI);
+
+  MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def);
+  MachineBasicBlock::iterator MII;
+  if (SVI.SpillVNI->isPHIDef())
+    MII = MBB->SkipPHIsAndLabels(MBB->begin());
+  else {
+    MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def);
+    assert(DefMI && "Defining instruction disappeared");
+    MII = DefMI;
+    ++MII;
+  }
+  // Insert spill without kill flag immediately after def.
+  TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot,
+                          MRI.getRegClass(SVI.SpillReg), &TRI);
+  --MII; // Point to store instruction.
+  LIS.InsertMachineInstrInMaps(MII);
+  VRM.addSpillSlotUse(StackSlot, MII);
+  DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
+  return true;
+}
+
+/// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any
+/// redundant spills of this value in SLI.reg and sibling copies.
+void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
+  assert(VNI && "Missing value");
+  SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+  WorkList.push_back(std::make_pair(&SLI, VNI));
+  assert(StackInt && "No stack slot assigned yet.");
+
+  do {
+    LiveInterval *LI;
+    tie(LI, VNI) = WorkList.pop_back_val();
+    unsigned Reg = LI->reg;
+    DEBUG(dbgs() << "Checking redundant spills for "
+                 << VNI->id << '@' << VNI->def << " in " << *LI << '\n');
+
+    // Regs to spill are taken care of.
+    if (isRegToSpill(Reg))
+      continue;
+
+    // Add all of VNI's live range to StackInt.
+    StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0));
+    DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');
+
+    // Find all spills and copies of VNI.
+    for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg);
+         MachineInstr *MI = UI.skipInstruction();) {
+      if (!MI->isCopy() && !MI->getDesc().mayStore())
+        continue;
+      SlotIndex Idx = LIS.getInstructionIndex(MI);
+      if (LI->getVNInfoAt(Idx) != VNI)
+        continue;
+
+      // Follow sibling copies down the dominator tree.
+      if (unsigned DstReg = isFullCopyOf(MI, Reg)) {
+        if (isSibling(DstReg)) {
+           LiveInterval &DstLI = LIS.getInterval(DstReg);
+           VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getDefIndex());
+           assert(DstVNI && "Missing defined value");
+           assert(DstVNI->def == Idx.getDefIndex() && "Wrong copy def slot");
+           WorkList.push_back(std::make_pair(&DstLI, DstVNI));
+        }
+        continue;
+      }
+
+      // Erase spills.
+      int FI;
+      if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) {
+        DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << *MI);
+        // eliminateDeadDefs won't normally remove stores, so switch opcode.
+        MI->setDesc(TII.get(TargetOpcode::KILL));
+        DeadDefs.push_back(MI);
+      }
+    }
+  } while (!WorkList.empty());
+}
+
+
+//===----------------------------------------------------------------------===//
+//                            Rematerialization
+//===----------------------------------------------------------------------===//
+
+/// markValueUsed - Remember that VNI failed to rematerialize, so its defining
+/// instruction cannot be eliminated. See through snippet copies
+void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
+  SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+  WorkList.push_back(std::make_pair(LI, VNI));
+  do {
+    tie(LI, VNI) = WorkList.pop_back_val();
+    if (!UsedValues.insert(VNI))
+      continue;
+
+    if (VNI->isPHIDef()) {
+      MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+             PE = MBB->pred_end(); PI != PE; ++PI) {
+        VNInfo *PVNI = LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot());
+        if (PVNI)
+          WorkList.push_back(std::make_pair(LI, PVNI));
+      }
+      continue;
+    }
+
+    // Follow snippet copies.
+    MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+    if (!SnippetCopies.count(MI))
+      continue;
+    LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg());
+    assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy");
+    VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getUseIndex());
+    assert(SnipVNI && "Snippet undefined before copy");
+    WorkList.push_back(std::make_pair(&SnipLI, SnipVNI));
+  } while (!WorkList.empty());
+}
+
 /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
-bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
-  SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex();
-  VNInfo *OrigVNI = edit_->getParent().getVNInfoAt(UseIdx);
+bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
+                                     MachineBasicBlock::iterator MI) {
+  SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex();
+  VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx);
 
-  if (!OrigVNI) {
+  if (!ParentVNI) {
     DEBUG(dbgs() << "\tadding <undef> flags: ");
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
-      if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg())
+      if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg)
         MO.setIsUndef();
     }
     DEBUG(dbgs() << UseIdx << '\t' << *MI);
     return true;
   }
 
-  LiveRangeEdit::Remat RM(OrigVNI);
-  if (!edit_->canRematerializeAt(RM, UseIdx, false, lis_)) {
-    usedValues_.insert(OrigVNI);
+  if (SnippetCopies.count(MI))
+    return false;
+
+  // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy.
+  LiveRangeEdit::Remat RM(ParentVNI);
+  SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
+  if (SibI != SibValues.end())
+    RM.OrigMI = SibI->second.DefMI;
+  if (!Edit->canRematerializeAt(RM, UseIdx, false, LIS)) {
+    markValueUsed(&VirtReg, ParentVNI);
     DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
     return false;
   }
 
-  // If the instruction also writes edit_->getReg(), it had better not require
-  // the same register for uses and defs.
+  // If the instruction also writes VirtReg.reg, it had better not require the
+  // same register for uses and defs.
   bool Reads, Writes;
   SmallVector<unsigned, 8> Ops;
-  tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit_->getReg(), &Ops);
+  tie(Reads, Writes) = MI->readsWritesVirtualRegister(VirtReg.reg, &Ops);
   if (Writes) {
     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(Ops[i]);
       if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) {
-        usedValues_.insert(OrigVNI);
+        markValueUsed(&VirtReg, ParentVNI);
         DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
         return false;
       }
@@ -145,35 +641,31 @@ bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
   // fold a load into the instruction. That avoids allocating a new register.
   if (RM.OrigMI->getDesc().canFoldAsLoad() &&
       foldMemoryOperand(MI, Ops, RM.OrigMI)) {
-    edit_->markRematerialized(RM.ParentVNI);
+    Edit->markRematerialized(RM.ParentVNI);
     return true;
   }
 
   // Alocate a new register for the remat.
-  LiveInterval &NewLI = edit_->create(mri_, lis_, vrm_);
+  LiveInterval &NewLI = Edit->createFrom(Original, LIS, VRM);
   NewLI.markNotSpillable();
 
-  // Rematting for a copy: Set allocation hint to be the destination register.
-  if (MI->isCopy())
-    mri_.setRegAllocationHint(NewLI.reg, 0, MI->getOperand(0).getReg());
-
   // Finally we can rematerialize OrigMI before MI.
-  SlotIndex DefIdx = edit_->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
-                                            lis_, tii_, tri_);
+  SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
+                                           LIS, TII, TRI);
   DEBUG(dbgs() << "\tremat:  " << DefIdx << '\t'
-               << *lis_.getInstructionFromIndex(DefIdx));
+               << *LIS.getInstructionFromIndex(DefIdx));
 
   // Replace operands
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(Ops[i]);
-    if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg()) {
+    if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
       MO.setReg(NewLI.reg);
       MO.setIsKill();
     }
   }
   DEBUG(dbgs() << "\t        " << UseIdx << '\t' << *MI);
 
-  VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, lis_.getVNInfoAllocator());
+  VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator());
   NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
   DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
   return true;
@@ -182,75 +674,85 @@ bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
 /// reMaterializeAll - Try to rematerialize as many uses as possible,
 /// and trim the live ranges after.
 void InlineSpiller::reMaterializeAll() {
-  // Do a quick scan of the interval values to find if any are remattable.
-  if (!edit_->anyRematerializable(lis_, tii_, aa_))
+  // analyzeSiblingValues has already tested all relevant defining instructions.
+  if (!Edit->anyRematerializable(LIS, TII, AA))
     return;
 
-  usedValues_.clear();
+  UsedValues.clear();
 
-  // Try to remat before all uses of edit_->getReg().
+  // Try to remat before all uses of snippets.
   bool anyRemat = false;
-  for (MachineRegisterInfo::use_nodbg_iterator
-       RI = mri_.use_nodbg_begin(edit_->getReg());
-       MachineInstr *MI = RI.skipInstruction();)
-     anyRemat |= reMaterializeFor(MI);
-
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+    unsigned Reg = RegsToSpill[i];
+    LiveInterval &LI = LIS.getInterval(Reg);
+    for (MachineRegisterInfo::use_nodbg_iterator
+         RI = MRI.use_nodbg_begin(Reg);
+         MachineInstr *MI = RI.skipInstruction();)
+      anyRemat |= reMaterializeFor(LI, MI);
+  }
   if (!anyRemat)
     return;
 
   // Remove any values that were completely rematted.
-  bool anyRemoved = false;
-  for (LiveInterval::vni_iterator I = edit_->getParent().vni_begin(),
-       E = edit_->getParent().vni_end(); I != E; ++I) {
-    VNInfo *VNI = *I;
-    if (VNI->hasPHIKill() || !edit_->didRematerialize(VNI) ||
-        usedValues_.count(VNI))
-      continue;
-    MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
-    DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI);
-    lis_.RemoveMachineInstrFromMaps(DefMI);
-    vrm_.RemoveMachineInstrFromMaps(DefMI);
-    DefMI->eraseFromParent();
-    VNI->def = SlotIndex();
-    anyRemoved = true;
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+    unsigned Reg = RegsToSpill[i];
+    LiveInterval &LI = LIS.getInterval(Reg);
+    for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+         I != E; ++I) {
+      VNInfo *VNI = *I;
+      if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI))
+        continue;
+      MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+      MI->addRegisterDead(Reg, &TRI);
+      if (!MI->allDefsAreDead())
+        continue;
+      DEBUG(dbgs() << "All defs dead: " << *MI);
+      DeadDefs.push_back(MI);
+    }
   }
 
-  if (!anyRemoved)
+  // Eliminate dead code after remat. Note that some snippet copies may be
+  // deleted here.
+  if (DeadDefs.empty())
     return;
-
-  // Removing values may cause debug uses where parent is not live.
-  for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(edit_->getReg());
-       MachineInstr *MI = RI.skipInstruction();) {
-    if (!MI->isDebugValue())
+  DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
+  Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+
+  // Get rid of deleted and empty intervals.
+  for (unsigned i = RegsToSpill.size(); i != 0; --i) {
+    unsigned Reg = RegsToSpill[i-1];
+    if (!LIS.hasInterval(Reg)) {
+      RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
       continue;
-    // Try to preserve the debug value if parent is live immediately after it.
-    MachineBasicBlock::iterator NextMI = MI;
-    ++NextMI;
-    if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) {
-      SlotIndex Idx = lis_.getInstructionIndex(NextMI);
-      VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx);
-      if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI)))
-        continue;
     }
-    DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI);
-    MI->eraseFromParent();
+    LiveInterval &LI = LIS.getInterval(Reg);
+    if (!LI.empty())
+      continue;
+    Edit->eraseVirtReg(Reg, LIS);
+    RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
   }
+  DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
 }
 
-/// If MI is a load or store of stackSlot_, it can be removed.
-bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) {
+
+//===----------------------------------------------------------------------===//
+//                                 Spilling
+//===----------------------------------------------------------------------===//
+
+/// If MI is a load or store of StackSlot, it can be removed.
+bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
   int FI = 0;
-  unsigned reg;
-  if (!(reg = tii_.isLoadFromStackSlot(MI, FI)) &&
-      !(reg = tii_.isStoreToStackSlot(MI, FI)))
+  unsigned InstrReg;
+  if (!(InstrReg = TII.isLoadFromStackSlot(MI, FI)) &&
+      !(InstrReg = TII.isStoreToStackSlot(MI, FI)))
     return false;
 
   // We have a stack access. Is it the right register and slot?
-  if (reg != edit_->getReg() || FI != stackSlot_)
+  if (InstrReg != Reg || FI != StackSlot)
     return false;
 
   DEBUG(dbgs() << "Coalescing stack access: " << *MI);
-  lis_.RemoveMachineInstrFromMaps(MI);
+  LIS.RemoveMachineInstrFromMaps(MI);
   MI->eraseFromParent();
   return true;
 }
@@ -283,13 +785,13 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
   }
 
   MachineInstr *FoldMI =
-                LoadMI ? tii_.foldMemoryOperand(MI, FoldOps, LoadMI)
-                       : tii_.foldMemoryOperand(MI, FoldOps, stackSlot_);
+                LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI)
+                       : TII.foldMemoryOperand(MI, FoldOps, StackSlot);
   if (!FoldMI)
     return false;
-  lis_.ReplaceMachineInstrInMaps(MI, FoldMI);
+  LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
   if (!LoadMI)
-    vrm_.addSpillSlotUse(stackSlot_, FoldMI);
+    VRM.addSpillSlotUse(StackSlot, FoldMI);
   MI->eraseFromParent();
   DEBUG(dbgs() << "\tfolded: " << *FoldMI);
   return true;
@@ -297,84 +799,40 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
 
 /// insertReload - Insert a reload of NewLI.reg before MI.
 void InlineSpiller::insertReload(LiveInterval &NewLI,
+                                 SlotIndex Idx,
                                  MachineBasicBlock::iterator MI) {
   MachineBasicBlock &MBB = *MI->getParent();
-  SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex();
-  tii_.loadRegFromStackSlot(MBB, MI, NewLI.reg, stackSlot_, rc_, &tri_);
+  TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot,
+                           MRI.getRegClass(NewLI.reg), &TRI);
   --MI; // Point to load instruction.
-  SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
-  vrm_.addSpillSlotUse(stackSlot_, MI);
+  SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
+  VRM.addSpillSlotUse(StackSlot, MI);
   DEBUG(dbgs() << "\treload:  " << LoadIdx << '\t' << *MI);
   VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0,
-                                       lis_.getVNInfoAllocator());
+                                       LIS.getVNInfoAllocator());
   NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
 }
 
 /// insertSpill - Insert a spill of NewLI.reg after MI.
-void InlineSpiller::insertSpill(LiveInterval &NewLI,
-                                MachineBasicBlock::iterator MI) {
+void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
+                                SlotIndex Idx, MachineBasicBlock::iterator MI) {
   MachineBasicBlock &MBB = *MI->getParent();
-
-  // Get the defined value. It could be an early clobber so keep the def index.
-  SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex();
-  VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx);
-  assert(VNI && VNI->def.getDefIndex() == Idx && "Inconsistent VNInfo");
-  Idx = VNI->def;
-
-  tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_);
+  TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot,
+                          MRI.getRegClass(NewLI.reg), &TRI);
   --MI; // Point to store instruction.
-  SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
-  vrm_.addSpillSlotUse(stackSlot_, MI);
+  SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
+  VRM.addSpillSlotUse(StackSlot, MI);
   DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
-  VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, lis_.getVNInfoAllocator());
+  VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator());
   NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
 }
 
-void InlineSpiller::spill(LiveInterval *li,
-                          SmallVectorImpl<LiveInterval*> &newIntervals,
-                          const SmallVectorImpl<LiveInterval*> &spillIs) {
-  LiveRangeEdit edit(*li, newIntervals, spillIs);
-  spill(edit);
-  if (VerifySpills)
-    mf_.verify(&pass_, "After inline spill");
-}
-
-void InlineSpiller::spill(LiveRangeEdit &edit) {
-  edit_ = &edit;
-  assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
-         && "Trying to spill a stack slot.");
-  DEBUG(dbgs() << "Inline spilling "
-               << mri_.getRegClass(edit.getReg())->getName()
-               << ':' << edit.getParent() << "\nFrom original "
-               << PrintReg(vrm_.getOriginal(edit.getReg())) << '\n');
-  assert(edit.getParent().isSpillable() &&
-         "Attempting to spill already spilled value.");
-
-  reMaterializeAll();
+/// spillAroundUses - insert spill code around each use of Reg.
+void InlineSpiller::spillAroundUses(unsigned Reg) {
+  LiveInterval &OldLI = LIS.getInterval(Reg);
 
-  // Remat may handle everything.
-  if (edit_->getParent().empty())
-    return;
-
-  rc_ = mri_.getRegClass(edit.getReg());
-
-  // Share a stack slot among all descendants of Orig.
-  unsigned Orig = vrm_.getOriginal(edit.getReg());
-  stackSlot_ = vrm_.getStackSlot(Orig);
-  if (stackSlot_ == VirtRegMap::NO_STACK_SLOT)
-    stackSlot_ = vrm_.assignVirt2StackSlot(Orig);
-
-  if (Orig != edit.getReg())
-    vrm_.assignVirt2StackSlot(edit.getReg(), stackSlot_);
-
-  // Update LiveStacks now that we are committed to spilling.
-  LiveInterval &stacklvr = lss_.getOrCreateInterval(stackSlot_, rc_);
-  if (!stacklvr.hasAtLeastOneValue())
-    stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator());
-  stacklvr.MergeRangesInAsValue(edit_->getParent(), stacklvr.getValNumInfo(0));
-
-  // Iterate over instructions using register.
-  for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(edit.getReg());
+  // Iterate over instructions using Reg.
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg);
        MachineInstr *MI = RI.skipInstruction();) {
 
     // Debug values are not allowed to affect codegen.
@@ -383,7 +841,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
       uint64_t Offset = MI->getOperand(1).getImm();
       const MDNode *MDPtr = MI->getOperand(2).getMetadata();
       DebugLoc DL = MI->getDebugLoc();
-      if (MachineInstr *NewDV = tii_.emitFrameIndexDebugValue(mf_, stackSlot_,
+      if (MachineInstr *NewDV = TII.emitFrameIndexDebugValue(MF, StackSlot,
                                                            Offset, MDPtr, DL)) {
         DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
         MachineBasicBlock *MBB = MI->getParent();
@@ -395,14 +853,44 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
       continue;
     }
 
+    // Ignore copies to/from snippets. We'll delete them.
+    if (SnippetCopies.count(MI))
+      continue;
+
     // Stack slot accesses may coalesce away.
-    if (coalesceStackAccess(MI))
+    if (coalesceStackAccess(MI, Reg))
       continue;
 
     // Analyze instruction.
     bool Reads, Writes;
     SmallVector<unsigned, 8> Ops;
-    tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit.getReg(), &Ops);
+    tie(Reads, Writes) = MI->readsWritesVirtualRegister(Reg, &Ops);
+
+    // Find the slot index where this instruction reads and writes OldLI.
+    // This is usually the def slot, except for tied early clobbers.
+    SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
+    if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex()))
+      if (SlotIndex::isSameInstr(Idx, VNI->def))
+        Idx = VNI->def;
+
+    // Check for a sibling copy.
+    unsigned SibReg = isFullCopyOf(MI, Reg);
+    if (SibReg && isSibling(SibReg)) {
+      if (Writes) {
+        // Hoist the spill of a sib-reg copy.
+        if (hoistSpill(OldLI, MI)) {
+          // This COPY is now dead, the value is already in the stack slot.
+          MI->getOperand(0).setIsDead();
+          DeadDefs.push_back(MI);
+          continue;
+        }
+      } else {
+        // This is a reload for a sib-reg copy. Drop spills downstream.
+        LiveInterval &SibLI = LIS.getInterval(SibReg);
+        eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx));
+        // The COPY will fold to a reload below.
+      }
+    }
 
     // Attempt to fold memory ops.
     if (foldMemoryOperand(MI, Ops))
@@ -410,11 +898,11 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
 
     // Allocate interval around instruction.
     // FIXME: Infer regclass from instruction alone.
-    LiveInterval &NewLI = edit.create(mri_, lis_, vrm_);
+    LiveInterval &NewLI = Edit->createFrom(Reg, LIS, VRM);
     NewLI.markNotSpillable();
 
     if (Reads)
-      insertReload(NewLI, MI);
+      insertReload(NewLI, Idx, MI);
 
     // Rewrite instruction operands.
     bool hasLiveDef = false;
@@ -429,11 +917,84 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
           hasLiveDef = true;
       }
     }
+    DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI);
 
     // FIXME: Use a second vreg if instruction has no tied ops.
     if (Writes && hasLiveDef)
-      insertSpill(NewLI, MI);
+      insertSpill(NewLI, OldLI, Idx, MI);
 
     DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
   }
 }
+
+/// spillAll - Spill all registers remaining after rematerialization.
+void InlineSpiller::spillAll() {
+  // Update LiveStacks now that we are committed to spilling.
+  if (StackSlot == VirtRegMap::NO_STACK_SLOT) {
+    StackSlot = VRM.assignVirt2StackSlot(Original);
+    StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original));
+    StackInt->getNextValue(SlotIndex(), 0, LSS.getVNInfoAllocator());
+  } else
+    StackInt = &LSS.getInterval(StackSlot);
+
+  if (Original != Edit->getReg())
+    VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot);
+
+  assert(StackInt->getNumValNums() == 1 && "Bad stack interval values");
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+    StackInt->MergeRangesInAsValue(LIS.getInterval(RegsToSpill[i]),
+                                   StackInt->getValNumInfo(0));
+  DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');
+
+  // Spill around uses of all RegsToSpill.
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+    spillAroundUses(RegsToSpill[i]);
+
+  // Hoisted spills may cause dead code.
+  if (!DeadDefs.empty()) {
+    DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
+    Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+  }
+
+  // Finally delete the SnippetCopies.
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg());
+       MachineInstr *MI = RI.skipInstruction();) {
+    assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
+    // FIXME: Do this with a LiveRangeEdit callback.
+    VRM.RemoveMachineInstrFromMaps(MI);
+    LIS.RemoveMachineInstrFromMaps(MI);
+    MI->eraseFromParent();
+  }
+
+  // Delete all spilled registers.
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
+    Edit->eraseVirtReg(RegsToSpill[i], LIS);
+}
+
+void InlineSpiller::spill(LiveRangeEdit &edit) {
+  Edit = &edit;
+  assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
+         && "Trying to spill a stack slot.");
+  // Share a stack slot among all descendants of Original.
+  Original = VRM.getOriginal(edit.getReg());
+  StackSlot = VRM.getStackSlot(Original);
+  StackInt = 0;
+
+  DEBUG(dbgs() << "Inline spilling "
+               << MRI.getRegClass(edit.getReg())->getName()
+               << ':' << edit.getParent() << "\nFrom original "
+               << LIS.getInterval(Original) << '\n');
+  assert(edit.getParent().isSpillable() &&
+         "Attempting to spill already spilled value.");
+  assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
+
+  collectRegsToSpill();
+  analyzeSiblingValues();
+  reMaterializeAll();
+
+  // Remat may handle everything.
+  if (!RegsToSpill.empty())
+    spillAll();
+
+  Edit->calculateRegClassAndHint(MF, LIS, Loops);
+}
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
new file mode 100644
index 0000000..b1014a9
--- /dev/null
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -0,0 +1,155 @@
+//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference in LiveIntervalUnions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "InterferenceCache.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+void InterferenceCache::init(MachineFunction *mf,
+                             LiveIntervalUnion *liuarray,
+                             SlotIndexes *indexes,
+                            const TargetRegisterInfo *tri) {
+  MF = mf;
+  LIUArray = liuarray;
+  TRI = tri;
+  PhysRegEntries.assign(TRI->getNumRegs(), 0);
+  for (unsigned i = 0; i != CacheEntries; ++i)
+    Entries[i].clear(mf, indexes);
+}
+
+InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
+  unsigned E = PhysRegEntries[PhysReg];
+  if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) {
+    if (!Entries[E].valid(LIUArray, TRI))
+      Entries[E].revalidate();
+    return &Entries[E];
+  }
+  // No valid entry exists, pick the next round-robin entry.
+  E = RoundRobin;
+  if (++RoundRobin == CacheEntries)
+    RoundRobin = 0;
+  Entries[E].reset(PhysReg, LIUArray, TRI, MF);
+  PhysRegEntries[PhysReg] = E;
+  return &Entries[E];
+}
+
+/// revalidate - LIU contents have changed, update tags.
+void InterferenceCache::Entry::revalidate() {
+  // Invalidate all block entries.
+  ++Tag;
+  // Invalidate all iterators.
+  PrevPos = SlotIndex();
+  for (unsigned i = 0, e = Aliases.size(); i != e; ++i)
+    Aliases[i].second = Aliases[i].first->getTag();
+}
+
+void InterferenceCache::Entry::reset(unsigned physReg,
+                                     LiveIntervalUnion *LIUArray,
+                                     const TargetRegisterInfo *TRI,
+                                     const MachineFunction *MF) {
+  // LIU's changed, invalidate cache.
+  ++Tag;
+  PhysReg = physReg;
+  Blocks.resize(MF->getNumBlockIDs());
+  Aliases.clear();
+  for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) {
+    LiveIntervalUnion *LIU = LIUArray + *AS;
+    Aliases.push_back(std::make_pair(LIU, LIU->getTag()));
+  }
+
+  // Reset iterators.
+  PrevPos = SlotIndex();
+  unsigned e = Aliases.size();
+  Iters.resize(e);
+  for (unsigned i = 0; i != e; ++i)
+    Iters[i].setMap(Aliases[i].first->getMap());
+}
+
+bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
+                                     const TargetRegisterInfo *TRI) {
+  unsigned i = 0, e = Aliases.size();
+  for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) {
+    LiveIntervalUnion *LIU = LIUArray + *AS;
+    if (i == e ||  Aliases[i].first != LIU)
+      return false;
+    if (LIU->changedSince(Aliases[i].second))
+      return false;
+  }
+  return i == e;
+}
+
+void InterferenceCache::Entry::update(unsigned MBBNum) {
+  SlotIndex Start, Stop;
+  tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+
+  // Use advanceTo only when possible.
+  if (PrevPos != Start) {
+    if (!PrevPos.isValid() || Start < PrevPos)
+      for (unsigned i = 0, e = Iters.size(); i != e; ++i)
+        Iters[i].find(Start);
+    else
+      for (unsigned i = 0, e = Iters.size(); i != e; ++i)
+        Iters[i].advanceTo(Start);
+    PrevPos = Start;
+  }
+
+  MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum);
+  BlockInterference *BI = &Blocks[MBBNum];
+  for (;;) {
+    BI->Tag = Tag;
+    BI->First = BI->Last = SlotIndex();
+
+    // Check for first interference.
+    for (unsigned i = 0, e = Iters.size(); i != e; ++i) {
+      Iter &I = Iters[i];
+      if (!I.valid())
+        continue;
+      SlotIndex StartI = I.start();
+      if (StartI >= Stop)
+        continue;
+      if (!BI->First.isValid() || StartI < BI->First)
+        BI->First = StartI;
+    }
+
+    PrevPos = Stop;
+    if (BI->First.isValid())
+      break;
+
+    // No interference in this block? Go ahead and precompute the next block.
+    if (++MFI == MF->end())
+      return;
+    MBBNum = MFI->getNumber();
+    BI = &Blocks[MBBNum];
+    if (BI->Tag == Tag)
+      return;
+    tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+  }
+
+  // Check for last interference in block.
+  for (unsigned i = 0, e = Iters.size(); i != e; ++i) {
+    Iter &I = Iters[i];
+    if (!I.valid() || I.start() >= Stop)
+      continue;
+    I.advanceTo(Stop);
+    bool Backup = !I.valid() || I.start() >= Stop;
+    if (Backup)
+      --I;
+    SlotIndex StopI = I.stop();
+    if (!BI->Last.isValid() || StopI > BI->Last)
+      BI->Last = StopI;
+    if (Backup)
+      ++I;
+  }
+}
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
new file mode 100644
index 0000000..6c36fa4
--- /dev/null
+++ b/lib/CodeGen/InterferenceCache.h
@@ -0,0 +1,163 @@
+//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference in LiveIntervalUnions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_INTERFERENCECACHE
+#define LLVM_CODEGEN_INTERFERENCECACHE
+
+#include "LiveIntervalUnion.h"
+
+namespace llvm {
+
+class InterferenceCache {
+  const TargetRegisterInfo *TRI;
+  LiveIntervalUnion *LIUArray;
+  SlotIndexes *Indexes;
+  MachineFunction *MF;
+
+  /// BlockInterference - information about the interference in a single basic
+  /// block.
+  struct BlockInterference {
+    BlockInterference() : Tag(0) {}
+    unsigned Tag;
+    SlotIndex First;
+    SlotIndex Last;
+  };
+
+  /// Entry - A cache entry containing interference information for all aliases
+  /// of PhysReg in all basic blocks.
+  class Entry {
+    /// PhysReg - The register currently represented.
+    unsigned PhysReg;
+
+    /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions
+    /// change.
+    unsigned Tag;
+
+    /// MF - The current function.
+    MachineFunction *MF;
+
+    /// Indexes - Mapping block numbers to SlotIndex ranges.
+    SlotIndexes *Indexes;
+
+    /// PrevPos - The previous position the iterators were moved to.
+    SlotIndex PrevPos;
+
+    /// AliasTags - A LiveIntervalUnion pointer and tag for each alias of
+    /// PhysReg.
+    SmallVector<std::pair<LiveIntervalUnion*, unsigned>, 8> Aliases;
+
+    typedef LiveIntervalUnion::SegmentIter Iter;
+
+    /// Iters - an iterator for each alias
+    SmallVector<Iter, 8> Iters;
+
+    /// Blocks - Interference for each block in the function.
+    SmallVector<BlockInterference, 8> Blocks;
+
+    /// update - Recompute Blocks[MBBNum]
+    void update(unsigned MBBNum);
+
+  public:
+    Entry() : PhysReg(0), Tag(0), Indexes(0) {}
+
+    void clear(MachineFunction *mf, SlotIndexes *indexes) {
+      PhysReg = 0;
+      MF = mf;
+      Indexes = indexes;
+    }
+
+    unsigned getPhysReg() const { return PhysReg; }
+
+    void revalidate();
+
+    /// valid - Return true if this is a valid entry for physReg.
+    bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
+
+    /// reset - Initialize entry to represent physReg's aliases.
+    void reset(unsigned physReg,
+               LiveIntervalUnion *LIUArray,
+               const TargetRegisterInfo *TRI,
+               const MachineFunction *MF);
+
+    /// get - Return an up to date BlockInterference.
+    BlockInterference *get(unsigned MBBNum) {
+      if (Blocks[MBBNum].Tag != Tag)
+        update(MBBNum);
+      return &Blocks[MBBNum];
+    }
+  };
+
+  // We don't keep a cache entry for every physical register, that would use too
+  // much memory. Instead, a fixed number of cache entries are used in a round-
+  // robin manner.
+  enum { CacheEntries = 32 };
+
+  // Point to an entry for each physreg. The entry pointed to may not be up to
+  // date, and it may have been reused for a different physreg.
+  SmallVector<unsigned char, 2> PhysRegEntries;
+
+  // Next round-robin entry to be picked.
+  unsigned RoundRobin;
+
+  // The actual cache entries.
+  Entry Entries[CacheEntries];
+
+  // get - Get a valid entry for PhysReg.
+  Entry *get(unsigned PhysReg);
+
+public:
+  InterferenceCache() : TRI(0), LIUArray(0), Indexes(0), MF(0), RoundRobin(0) {}
+
+  /// init - Prepare cache for a new function.
+  void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*,
+            const TargetRegisterInfo *);
+
+  /// Cursor - The primary query interface for the block interference cache.
+  class Cursor {
+    Entry *CacheEntry;
+    BlockInterference *Current;
+  public:
+    /// Cursor - Create a cursor for the interference allocated to PhysReg and
+    /// all its aliases.
+    Cursor(InterferenceCache &Cache, unsigned PhysReg)
+      : CacheEntry(Cache.get(PhysReg)), Current(0) {}
+
+    /// moveTo - Move cursor to basic block MBBNum.
+    void moveToBlock(unsigned MBBNum) {
+      Current = CacheEntry->get(MBBNum);
+    }
+
+    /// hasInterference - Return true if the current block has any interference.
+    bool hasInterference() {
+      return Current->First.isValid();
+    }
+
+    /// first - Return the starting index of the first interfering range in the
+    /// current block.
+    SlotIndex first() {
+      return Current->First;
+    }
+
+    /// last - Return the ending index of the last interfering range in the
+    /// current block.
+    SlotIndex last() {
+      return Current->Last;
+    }
+  };
+
+  friend class Cursor;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 80dfc76..e1dad2e 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -98,12 +98,6 @@ static cl::opt<cl::boolOrDefault>
 EnableFastISelOption("fast-isel", cl::Hidden,
   cl::desc("Enable the \"fast\" instruction selector"));
 
-// Enable or disable an experimental optimization to split GEPs
-// and run a special GVN pass which does not examine loads, in
-// an effort to factor out redundancy implicit in complex GEPs.
-static cl::opt<bool> EnableSplitGEPGVN("split-gep-gvn", cl::Hidden,
-    cl::desc("Split GEPs and run no-load GVN"));
-
 LLVMTargetMachine::LLVMTargetMachine(const Target &T,
                                      const std::string &Triple)
   : TargetMachine(T), TargetTriple(Triple) {
@@ -132,6 +126,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
     return true;
   assert(Context != 0 && "Failed to get MCContext");
 
+  if (hasMCSaveTempLabels())
+    Context->setAllowTemporaryLabels(false);
+
   const MCAsmInfo &MAI = *getMCAsmInfo();
   OwningPtr<MCStreamer> AsmStreamer;
 
@@ -139,7 +136,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   default: return true;
   case CGFT_AssemblyFile: {
     MCInstPrinter *InstPrinter =
-      getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI);
+      getTarget().createMCInstPrinter(*this, MAI.getAssemblerDialect(), MAI);
 
     // Create a code emitter if asked to show the encoding.
     MCCodeEmitter *MCE = 0;
@@ -152,6 +149,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
     MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
                                                   getVerboseAsm(),
                                                   hasMCUseLoc(),
+                                                  hasMCUseCFI(),
                                                   InstPrinter,
                                                   MCE, TAB,
                                                   ShowMCInst);
@@ -230,11 +228,40 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
 ///
 bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
                                           MCContext *&Ctx,
+                                          raw_ostream &Out,
                                           CodeGenOpt::Level OptLevel,
                                           bool DisableVerify) {
   // Add common CodeGen passes.
   if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
     return true;
+
+  if (hasMCSaveTempLabels())
+    Ctx->setAllowTemporaryLabels(false);
+
+  // Create the code emitter for the target if it exists.  If not, .o file
+  // emission fails.
+  MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Ctx);
+  TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple);
+  if (MCE == 0 || TAB == 0)
+    return true;
+
+  OwningPtr<MCStreamer> AsmStreamer;
+  AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Ctx,
+                                                     *TAB, Out, MCE,
+                                                     hasMCRelaxAll(),
+                                                     hasMCNoExecStack()));
+  AsmStreamer.get()->InitSections();
+
+  // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+  FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+  if (Printer == 0)
+    return true;
+
+  // If successful, createAsmPrinter took ownership of AsmStreamer.
+  AsmStreamer.take();
+
+  PM.add(Printer);
+
   // Make sure the code model is set.
   setCodeModelForJIT();
 
@@ -272,12 +299,6 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   if (!DisableVerify)
     PM.add(createVerifierPass());
 
-  // Optionally, tun split-GEPs and no-load GVN.
-  if (EnableSplitGEPGVN) {
-    PM.add(createGEPSplitterPass());
-    PM.add(createGVNPass(/*NoLoads=*/true));
-  }
-
   // Run loop strength reduction before anything else.
   if (OptLevel != CodeGenOpt::None && !DisableLSR) {
     PM.add(createLoopStrengthReducePass(getTargetLowering()));
@@ -304,6 +325,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     // FALLTHROUGH
   case ExceptionHandling::DwarfCFI:
   case ExceptionHandling::DwarfTable:
+  case ExceptionHandling::ARM:
     PM.add(createDwarfEHPass(this));
     break;
   case ExceptionHandling::None:
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 853ec1a..8b21483 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -76,6 +77,7 @@ typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
 /// held by the same virtual register. The equivalence class is the transitive
 /// closure of that relation.
 namespace {
+class LDVImpl;
 class UserValue {
   const MDNode *variable; ///< The debug info variable we are part of.
   unsigned offset;        ///< Byte offset into variable.
@@ -99,10 +101,6 @@ class UserValue {
   void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo,
                         LiveIntervals &LIS, const TargetInstrInfo &TII);
 
-  /// insertDebugKill - Insert an undef DBG_VALUE into MBB at Idx.
-  void insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx,
-                       LiveIntervals &LIS, const TargetInstrInfo &TII);
-
 public:
   /// UserValue - Create a new UserValue.
   UserValue(const MDNode *var, unsigned o, DebugLoc L, 
@@ -146,17 +144,31 @@ public:
 
   /// getLocationNo - Return the location number that matches Loc.
   unsigned getLocationNo(const MachineOperand &LocMO) {
-    if (LocMO.isReg() && LocMO.getReg() == 0)
-      return ~0u;
-    for (unsigned i = 0, e = locations.size(); i != e; ++i)
-      if (LocMO.isIdenticalTo(locations[i]))
-        return i;
+    if (LocMO.isReg()) {
+      if (LocMO.getReg() == 0)
+        return ~0u;
+      // For register locations we dont care about use/def and other flags.
+      for (unsigned i = 0, e = locations.size(); i != e; ++i)
+        if (locations[i].isReg() &&
+            locations[i].getReg() == LocMO.getReg() &&
+            locations[i].getSubReg() == LocMO.getSubReg())
+          return i;
+    } else
+      for (unsigned i = 0, e = locations.size(); i != e; ++i)
+        if (LocMO.isIdenticalTo(locations[i]))
+          return i;
     locations.push_back(LocMO);
     // We are storing a MachineOperand outside a MachineInstr.
     locations.back().clearParent();
+    // Don't store def operands.
+    if (locations.back().isReg())
+      locations.back().setIsUse();
     return locations.size() - 1;
   }
 
+  /// mapVirtRegs - Ensure that all virtual register locations are mapped.
+  void mapVirtRegs(LDVImpl *LDV);
+
   /// addDef - Add a definition point to this value.
   void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
     // Add a singular (Idx,Idx) -> Loc mapping.
@@ -168,19 +180,36 @@ public:
   /// extendDef - Extend the current definition as far as possible down the
   /// dominator tree. Stop when meeting an existing def or when leaving the live
   /// range of VNI.
+  /// End points where VNI is no longer live are added to Kills.
   /// @param Idx   Starting point for the definition.
   /// @param LocNo Location number to propagate.
   /// @param LI    Restrict liveness to where LI has the value VNI. May be null.
   /// @param VNI   When LI is not null, this is the value to restrict to.
+  /// @param Kills Append end points of VNI's live range to Kills.
   /// @param LIS   Live intervals analysis.
   /// @param MDT   Dominator tree.
   void extendDef(SlotIndex Idx, unsigned LocNo,
                  LiveInterval *LI, const VNInfo *VNI,
+                 SmallVectorImpl<SlotIndex> *Kills,
                  LiveIntervals &LIS, MachineDominatorTree &MDT);
 
+  /// addDefsFromCopies - The value in LI/LocNo may be copies to other
+  /// registers. Determine if any of the copies are available at the kill
+  /// points, and add defs if possible.
+  /// @param LI      Scan for copies of the value in LI->reg.
+  /// @param LocNo   Location number of LI->reg.
+  /// @param Kills   Points where the range of LocNo could be extended.
+  /// @param NewDefs Append (Idx, LocNo) of inserted defs here.
+  void addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
+                      const SmallVectorImpl<SlotIndex> &Kills,
+                      SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
+                      MachineRegisterInfo &MRI,
+                      LiveIntervals &LIS);
+
   /// computeIntervals - Compute the live intervals of all locations after
   /// collecting all their def points.
-  void computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT);
+  void computeIntervals(MachineRegisterInfo &MRI,
+                        LiveIntervals &LIS, MachineDominatorTree &MDT);
 
   /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx.
   void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
@@ -230,9 +259,6 @@ class LDVImpl {
   /// lookupVirtReg - Find the EC leader for VirtReg or null.
   UserValue *lookupVirtReg(unsigned VirtReg);
 
-  /// mapVirtReg - Map virtual register to an equivalence class.
-  void mapVirtReg(unsigned VirtReg, UserValue *EC);
-
   /// handleDebugValue - Add DBG_VALUE instruction to our maps.
   /// @param MI  DBG_VALUE instruction
   /// @param Idx Last valid SLotIndex before instruction.
@@ -261,7 +287,10 @@ public:
     userVarMap.clear();
   }
 
-  /// renameRegister - Replace all references to OldReg wiht NewReg:SubIdx.
+  /// mapVirtReg - Map virtual register to an equivalence class.
+  void mapVirtReg(unsigned VirtReg, UserValue *EC);
+
+  /// renameRegister - Replace all references to OldReg with NewReg:SubIdx.
   void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
 
   /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
@@ -322,6 +351,13 @@ void UserValue::coalesceLocation(unsigned LocNo) {
   }
 }
 
+void UserValue::mapVirtRegs(LDVImpl *LDV) {
+  for (unsigned i = 0, e = locations.size(); i != e; ++i)
+    if (locations[i].isReg() &&
+        TargetRegisterInfo::isVirtualRegister(locations[i].getReg()))
+      LDV->mapVirtReg(locations[i].getReg(), this);
+}
+
 UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
                                  DebugLoc DL) {
   UserValue *&Leader = userVarMap[Var];
@@ -363,14 +399,6 @@ bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
   unsigned Offset = MI->getOperand(1).getImm();
   const MDNode *Var = MI->getOperand(2).getMetadata();
   UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc());
-
-  // If the location is a virtual register, make sure it is mapped.
-  if (MI->getOperand(0).isReg()) {
-    unsigned Reg = MI->getOperand(0).getReg();
-    if (TargetRegisterInfo::isVirtualRegister(Reg))
-      mapVirtReg(Reg, UV);
-  }
-
   UV->addDef(Idx, MI->getOperand(0));
   return true;
 }
@@ -405,6 +433,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
 
 void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
                           LiveInterval *LI, const VNInfo *VNI,
+                          SmallVectorImpl<SlotIndex> *Kills,
                           LiveIntervals &LIS, MachineDominatorTree &MDT) {
   SmallVector<SlotIndex, 16> Todo;
   Todo.push_back(Idx);
@@ -419,8 +448,11 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
     bool ToEnd = true;
     if (LI && VNI) {
       LiveRange *Range = LI->getLiveRangeContaining(Start);
-      if (!Range || Range->valno != VNI)
+      if (!Range || Range->valno != VNI) {
+        if (Kills)
+          Kills->push_back(Start);
         continue;
+      }
       if (Range->end < Stop)
         Stop = Range->end, ToEnd = false;
     }
@@ -438,6 +470,9 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
     // Limited by the next def.
     if (I.valid() && I.start() < Stop)
       Stop = I.start(), ToEnd = false;
+    // Limited by VNI's live range.
+    else if (!ToEnd && Kills)
+      Kills->push_back(Stop);
 
     if (Start >= Stop)
       continue;
@@ -455,7 +490,82 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
 }
 
 void
-UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) {
+UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
+                      const SmallVectorImpl<SlotIndex> &Kills,
+                      SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
+                      MachineRegisterInfo &MRI, LiveIntervals &LIS) {
+  if (Kills.empty())
+    return;
+  // Don't track copies from physregs, there are too many uses.
+  if (!TargetRegisterInfo::isVirtualRegister(LI->reg))
+    return;
+
+  // Collect all the (vreg, valno) pairs that are copies of LI.
+  SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues;
+  for (MachineRegisterInfo::use_nodbg_iterator
+         UI = MRI.use_nodbg_begin(LI->reg),
+         UE = MRI.use_nodbg_end(); UI != UE; ++UI) {
+    // Copies of the full value.
+    if (UI.getOperand().getSubReg() || !UI->isCopy())
+      continue;
+    MachineInstr *MI = &*UI;
+    unsigned DstReg = MI->getOperand(0).getReg();
+
+    // Don't follow copies to physregs. These are usually setting up call
+    // arguments, and the argument registers are always call clobbered. We are
+    // better off in the source register which could be a callee-saved register,
+    // or it could be spilled.
+    if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+      continue;
+
+    // Is LocNo extended to reach this copy? If not, another def may be blocking
+    // it, or we are looking at a wrong value of LI.
+    SlotIndex Idx = LIS.getInstructionIndex(MI);
+    LocMap::iterator I = locInts.find(Idx.getUseIndex());
+    if (!I.valid() || I.value() != LocNo)
+      continue;
+
+    if (!LIS.hasInterval(DstReg))
+      continue;
+    LiveInterval *DstLI = &LIS.getInterval(DstReg);
+    const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getDefIndex());
+    assert(DstVNI && DstVNI->def == Idx.getDefIndex() && "Bad copy value");
+    CopyValues.push_back(std::make_pair(DstLI, DstVNI));
+  }
+
+  if (CopyValues.empty())
+    return;
+
+  DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n');
+
+  // Try to add defs of the copied values for each kill point.
+  for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+    SlotIndex Idx = Kills[i];
+    for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) {
+      LiveInterval *DstLI = CopyValues[j].first;
+      const VNInfo *DstVNI = CopyValues[j].second;
+      if (DstLI->getVNInfoAt(Idx) != DstVNI)
+        continue;
+      // Check that there isn't already a def at Idx
+      LocMap::iterator I = locInts.find(Idx);
+      if (I.valid() && I.start() <= Idx)
+        continue;
+      DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #"
+                   << DstVNI->id << " in " << *DstLI << '\n');
+      MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
+      assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
+      unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
+      I.insert(Idx, Idx.getNextSlot(), LocNo);
+      NewDefs.push_back(std::make_pair(Idx, LocNo));
+      break;
+    }
+  }
+}
+
+void
+UserValue::computeIntervals(MachineRegisterInfo &MRI,
+                            LiveIntervals &LIS,
+                            MachineDominatorTree &MDT) {
   SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
 
   // Collect all defs to be extended (Skipping undefs).
@@ -463,7 +573,8 @@ UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) {
     if (I.value() != ~0u)
       Defs.push_back(std::make_pair(I.start(), I.value()));
 
-  for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+  // Extend all defs, and possibly add new ones along the way.
+  for (unsigned i = 0; i != Defs.size(); ++i) {
     SlotIndex Idx = Defs[i].first;
     unsigned LocNo = Defs[i].second;
     const MachineOperand &Loc = locations[LocNo];
@@ -472,9 +583,11 @@ UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) {
     if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) {
       LiveInterval *LI = &LIS.getInterval(Loc.getReg());
       const VNInfo *VNI = LI->getVNInfoAt(Idx);
-      extendDef(Idx, LocNo, LI, VNI, LIS, MDT);
+      SmallVector<SlotIndex, 16> Kills;
+      extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT);
+      addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
     } else
-      extendDef(Idx, LocNo, 0, 0, LIS, MDT);
+      extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT);
   }
 
   // Finally, erase all the undefs.
@@ -486,8 +599,10 @@ UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) {
 }
 
 void LDVImpl::computeIntervals() {
-  for (unsigned i = 0, e = userValues.size(); i != e; ++i)
-    userValues[i]->computeIntervals(*LIS, *MDT);
+  for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+    userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT);
+    userValues[i]->mapVirtRegs(this);
+  }
 }
 
 bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
@@ -640,13 +755,6 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
     .addOperand(Loc).addImm(offset).addMetadata(variable);
 }
 
-void UserValue::insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx,
-                               LiveIntervals &LIS, const TargetInstrInfo &TII) {
-  MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
-  BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)).addReg(0)
-    .addImm(offset).addMetadata(variable);
-}
-
 void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
                                 const TargetInstrInfo &TII) {
   MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
@@ -678,12 +786,6 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
       break;
 
     ++I;
-    if (Stop == MBBEnd)
-      continue;
-    // The current interval ends before MBB.
-    // Insert a kill if there is a gap.
-    if (!I.valid() || I.start() > Stop)
-      insertDebugKill(MBB, Stop, LIS, TII);
   }
 }
 
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index c2dbd6a..cfade24 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -30,19 +30,22 @@
 #include <algorithm>
 using namespace llvm;
 
-// CompEnd - Compare LiveRange ends.
-namespace {
-struct CompEnd {
-  bool operator()(const LiveRange &A, const LiveRange &B) const {
-    return A.end < B.end;
-  }
-};
-}
-
 LiveInterval::iterator LiveInterval::find(SlotIndex Pos) {
-  assert(Pos.isValid() && "Cannot search for an invalid index");
-  return std::upper_bound(begin(), end(), LiveRange(SlotIndex(), Pos, 0),
-                          CompEnd());
+  // This algorithm is basically std::upper_bound.
+  // Unfortunately, std::upper_bound cannot be used with mixed types until we
+  // adopt C++0x. Many libraries can do it, but not all.
+  if (empty() || Pos >= endIndex())
+    return end();
+  iterator I = begin();
+  size_t Len = ranges.size();
+  do {
+    size_t Mid = Len >> 1;
+    if (Pos < I[Mid].end)
+      Len = Mid;
+    else
+      I += Mid + 1, Len -= Mid + 1;
+  } while (Len);
+  return I;
 }
 
 /// killedInRange - Return true if the interval has kills in [Start,End).
@@ -291,6 +294,22 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
   return ranges.insert(it, LR);
 }
 
+/// extendInBlock - If this interval is live before UseIdx in the basic
+/// block that starts at StartIdx, extend it to be live at UseIdx and return
+/// the value. If there is no live range before UseIdx, return NULL.
+VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex UseIdx) {
+  if (empty())
+    return 0;
+  iterator I = std::upper_bound(begin(), end(), UseIdx);
+  if (I == begin())
+    return 0;
+  --I;
+  if (I->end <= StartIdx)
+    return 0;
+  if (I->end <= UseIdx)
+    extendIntervalEndTo(I, UseIdx.getNextSlot());
+  return I->valno;
+}
 
 /// removeRange - Remove the specified range from this interval.  Note that
 /// the range must be in a single LiveRange in its entirety.
@@ -476,60 +495,19 @@ void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
 void LiveInterval::MergeValueInAsValue(
                                     const LiveInterval &RHS,
                                     const VNInfo *RHSValNo, VNInfo *LHSValNo) {
-  SmallVector<VNInfo*, 4> ReplacedValNos;
-  iterator IP = begin();
+  // TODO: Make this more efficient.
+  iterator InsertPos = begin();
   for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
-    assert(I->valno == RHS.getValNumInfo(I->valno->id) && "Bad VNInfo");
     if (I->valno != RHSValNo)
       continue;
-    SlotIndex Start = I->start, End = I->end;
-    IP = std::upper_bound(IP, end(), Start);
-    // If the start of this range overlaps with an existing liverange, trim it.
-    if (IP != begin() && IP[-1].end > Start) {
-      if (IP[-1].valno != LHSValNo) {
-        ReplacedValNos.push_back(IP[-1].valno);
-        IP[-1].valno = LHSValNo; // Update val#.
-      }
-      Start = IP[-1].end;
-      // Trimmed away the whole range?
-      if (Start >= End) continue;
-    }
-    // If the end of this range overlaps with an existing liverange, trim it.
-    if (IP != end() && End > IP->start) {
-      if (IP->valno != LHSValNo) {
-        ReplacedValNos.push_back(IP->valno);
-        IP->valno = LHSValNo;  // Update val#.
-      }
-      End = IP->start;
-      // If this trimmed away the whole range, ignore it.
-      if (Start == End) continue;
-    }
-
     // Map the valno in the other live range to the current live range.
-    IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP);
-  }
-
-
-  SmallSet<VNInfo*, 4> Seen;
-  for (unsigned i = 0, e = ReplacedValNos.size(); i != e; ++i) {
-    VNInfo *V1 = ReplacedValNos[i];
-    if (Seen.insert(V1)) {
-      bool isDead = true;
-      for (const_iterator I = begin(), E = end(); I != E; ++I)
-        if (I->valno == V1) {
-          isDead = false;
-          break;
-        }
-      if (isDead) {
-        // Now that V1 is dead, remove it.
-        markValNoForDeletion(V1);
-      }
-    }
+    LiveRange Tmp = *I;
+    Tmp.valno = LHSValNo;
+    InsertPos = addRangeFrom(Tmp, InsertPos);
   }
 }
 
 
-
 /// MergeValueNumberInto - This method is called when two value nubmers
 /// are found to be equivalent.  This eliminates V1, replacing all
 /// LiveRanges with the V1 value number with the V2 value number.  This can
@@ -700,8 +678,8 @@ void LiveRange::print(raw_ostream &os) const {
 
 unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
   // Create initial equivalence classes.
-  eqClass_.clear();
-  eqClass_.grow(LI->getNumValNums());
+  EqClass.clear();
+  EqClass.grow(LI->getNumValNums());
 
   const VNInfo *used = 0, *unused = 0;
 
@@ -712,48 +690,65 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
     // Group all unused values into one class.
     if (VNI->isUnused()) {
       if (unused)
-        eqClass_.join(unused->id, VNI->id);
+        EqClass.join(unused->id, VNI->id);
       unused = VNI;
       continue;
     }
     used = VNI;
     if (VNI->isPHIDef()) {
-      const MachineBasicBlock *MBB = lis_.getMBBFromIndex(VNI->def);
+      const MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
       assert(MBB && "Phi-def has no defining MBB");
       // Connect to values live out of predecessors.
       for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
            PE = MBB->pred_end(); PI != PE; ++PI)
         if (const VNInfo *PVNI =
-              LI->getVNInfoAt(lis_.getMBBEndIdx(*PI).getPrevSlot()))
-          eqClass_.join(VNI->id, PVNI->id);
+              LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()))
+          EqClass.join(VNI->id, PVNI->id);
     } else {
       // Normal value defined by an instruction. Check for two-addr redef.
       // FIXME: This could be coincidental. Should we really check for a tied
       // operand constraint?
       // Note that VNI->def may be a use slot for an early clobber def.
       if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot()))
-        eqClass_.join(VNI->id, UVNI->id);
+        EqClass.join(VNI->id, UVNI->id);
     }
   }
 
   // Lump all the unused values in with the last used value.
   if (used && unused)
-    eqClass_.join(used->id, unused->id);
+    EqClass.join(used->id, unused->id);
 
-  eqClass_.compress();
-  return eqClass_.getNumClasses();
+  EqClass.compress();
+  return EqClass.getNumClasses();
 }
 
-void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[]) {
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
+                                          MachineRegisterInfo &MRI) {
   assert(LIV[0] && "LIV[0] must be set");
   LiveInterval &LI = *LIV[0];
 
-  // First move runs to new intervals.
+  // Rewrite instructions.
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg),
+       RE = MRI.reg_end(); RI != RE;) {
+    MachineOperand &MO = RI.getOperand();
+    MachineInstr *MI = MO.getParent();
+    ++RI;
+    if (MO.isUse() && MO.isUndef())
+      continue;
+    // DBG_VALUE instructions should have been eliminated earlier.
+    SlotIndex Idx = LIS.getInstructionIndex(MI);
+    Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+    const VNInfo *VNI = LI.getVNInfoAt(Idx);
+    assert(VNI && "Interval not live at use.");
+    MO.setReg(LIV[getEqClass(VNI)]->reg);
+  }
+
+  // Move runs to new intervals.
   LiveInterval::iterator J = LI.begin(), E = LI.end();
-  while (J != E && eqClass_[J->valno->id] == 0)
+  while (J != E && EqClass[J->valno->id] == 0)
     ++J;
   for (LiveInterval::iterator I = J; I != E; ++I) {
-    if (unsigned eq = eqClass_[I->valno->id]) {
+    if (unsigned eq = EqClass[I->valno->id]) {
       assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) &&
              "New intervals should be empty");
       LIV[eq]->ranges.push_back(*I);
@@ -764,11 +759,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[]) {
 
   // Transfer VNInfos to their new owners and renumber them.
   unsigned j = 0, e = LI.getNumValNums();
-  while (j != e && eqClass_[j] == 0)
+  while (j != e && EqClass[j] == 0)
     ++j;
   for (unsigned i = j; i != e; ++i) {
     VNInfo *VNI = LI.getValNumInfo(i);
-    if (unsigned eq = eqClass_[i]) {
+    if (unsigned eq = EqClass[i]) {
       VNI->id = LIV[eq]->getNumValNums();
       LIV[eq]->valnos.push_back(VNI);
     } else {
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index aef5b5f..9257191 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -572,19 +572,12 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
   if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
     handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
                              getOrCreateInterval(MO.getReg()));
-  else if (allocatableRegs_[MO.getReg()]) {
+  else {
     MachineInstr *CopyMI = NULL;
     if (MI->isCopyLike())
       CopyMI = MI;
     handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
                               getOrCreateInterval(MO.getReg()), CopyMI);
-    // Def of a register also defines its sub-registers.
-    for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS)
-      // If MI also modifies the sub-register explicitly, avoid processing it
-      // more than once. Do not pass in TRI here so it checks for exact match.
-      if (!MI->definesRegister(*AS))
-        handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
-                                  getOrCreateInterval(*AS), 0);
   }
 }
 
@@ -645,7 +638,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
       end = MIIdx.getStoreIndex();
     } else {
       DEBUG(dbgs() << " live through");
-      end = baseIndex;
+      end = getMBBEndIdx(MBB);
     }
   }
 
@@ -746,7 +739,8 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
 /// shrinkToUses - After removing some uses of a register, shrink its live
 /// range to just the remaining uses. This method does not compute reaching
 /// defs for new uses, and it doesn't remove dead defs.
-void LiveIntervals::shrinkToUses(LiveInterval *li) {
+bool LiveIntervals::shrinkToUses(LiveInterval *li,
+                                 SmallVectorImpl<MachineInstr*> *dead) {
   DEBUG(dbgs() << "Shrink: " << *li << '\n');
   assert(TargetRegisterInfo::isVirtualRegister(li->reg)
          && "Can't only shrink physical registers");
@@ -760,7 +754,15 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) {
       continue;
     SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex();
     VNInfo *VNI = li->getVNInfoAt(Idx);
-    assert(VNI && "Live interval not live into reading instruction");
+    if (!VNI) {
+      // This shouldn't happen: readsVirtualRegister returns true, but there is
+      // no live value. It is likely caused by a target getting <undef> flags
+      // wrong.
+      DEBUG(dbgs() << Idx << '\t' << *UseMI
+                   << "Warning: Instr claims to read non-existent value in "
+                    << *li << '\n');
+      continue;
+    }
     if (VNI->def == Idx) {
       // Special case: An early-clobber tied operand reads and writes the
       // register one slot early.
@@ -778,49 +780,47 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) {
     VNInfo *VNI = *I;
     if (VNI->isUnused())
       continue;
+    // We may eliminate PHI values, so recompute PHIKill flags.
+    VNI->setHasPHIKill(false);
     NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI));
+
+    // A use tied to an early-clobber def ends at the load slot and isn't caught
+    // above. Catch it here instead. This probably only ever happens for inline
+    // assembly.
+    if (VNI->def.isUse())
+      if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex()))
+        WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI));
   }
 
+  // Keep track of the PHIs that are in use.
+  SmallPtrSet<VNInfo*, 8> UsedPHIs;
+
   // Extend intervals to reach all uses in WorkList.
   while (!WorkList.empty()) {
     SlotIndex Idx = WorkList.back().first;
     VNInfo *VNI = WorkList.back().second;
     WorkList.pop_back();
-
-    // Extend the live range for VNI to be live at Idx.
-    LiveInterval::iterator I = NewLI.find(Idx);
-
-    // Already got it?
-    if (I != NewLI.end() && I->start <= Idx) {
-      assert(I->valno == VNI && "Unexpected existing value number");
-      continue;
-    }
-
-    // Is there already a live range in the block containing Idx?
     const MachineBasicBlock *MBB = getMBBFromIndex(Idx);
     SlotIndex BlockStart = getMBBStartIdx(MBB);
-    DEBUG(dbgs() << "Shrink: Use val#" << VNI->id << " at " << Idx
-                 << " in BB#" << MBB->getNumber() << '@' << BlockStart);
-    if (I != NewLI.begin() && (--I)->end > BlockStart) {
-      assert(I->valno == VNI && "Wrong reaching def");
-      DEBUG(dbgs() << " extend [" << I->start << ';' << I->end << ")\n");
-      // Is this the first use of a PHIDef in its defining block?
-      if (VNI->isPHIDef() && I->end == VNI->def.getNextSlot()) {
-        // The PHI is live, make sure the predecessors are live-out.
-        for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
-             PE = MBB->pred_end(); PI != PE; ++PI) {
-          SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
-          VNInfo *PVNI = li->getVNInfoAt(Stop);
-          // A predecessor is not required to have a live-out value for a PHI.
-          if (PVNI) {
-            assert(PVNI->hasPHIKill() && "Missing hasPHIKill flag");
-            WorkList.push_back(std::make_pair(Stop, PVNI));
-          }
+
+    // Extend the live range for VNI to be live at Idx.
+    if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) {
+      (void)ExtVNI;
+      assert(ExtVNI == VNI && "Unexpected existing value number");
+      // Is this a PHIDef we haven't seen before?
+      if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI))
+        continue;
+      // The PHI is live, make sure the predecessors are live-out.
+      for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+           PE = MBB->pred_end(); PI != PE; ++PI) {
+        SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+        VNInfo *PVNI = li->getVNInfoAt(Stop);
+        // A predecessor is not required to have a live-out value for a PHI.
+        if (PVNI) {
+          PVNI->setHasPHIKill(true);
+          WorkList.push_back(std::make_pair(Stop, PVNI));
         }
       }
-
-      // Extend the live range in the block to include Idx.
-      NewLI.addRange(LiveRange(I->end, Idx.getNextSlot(), VNI));
       continue;
     }
 
@@ -838,6 +838,7 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) {
   }
 
   // Handle dead values.
+  bool CanSeparate = false;
   for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
        I != E; ++I) {
     VNInfo *VNI = *I;
@@ -847,21 +848,28 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) {
     assert(LII != NewLI.end() && "Missing live range for PHI");
     if (LII->end != VNI->def.getNextSlot())
       continue;
-    if (!VNI->isPHIDef()) {
+    if (VNI->isPHIDef()) {
       // This is a dead PHI. Remove it.
       VNI->setIsUnused(true);
       NewLI.removeRange(*LII);
+      DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
+      CanSeparate = true;
     } else {
       // This is a dead def. Make sure the instruction knows.
       MachineInstr *MI = getInstructionFromIndex(VNI->def);
       assert(MI && "No instruction defining live value");
       MI->addRegisterDead(li->reg, tri_);
+      if (dead && MI->allDefsAreDead()) {
+        DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI);
+        dead->push_back(MI);
+      }
     }
   }
 
   // Move the trimmed ranges back.
   li->ranges.swap(NewLI.ranges);
-  DEBUG(dbgs() << "Shrink: " << *li << '\n');
+  DEBUG(dbgs() << "Shrunk: " << *li << '\n');
+  return CanSeparate;
 }
 
 
@@ -955,7 +963,7 @@ bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
 bool
 LiveIntervals::isReMaterializable(const LiveInterval &li,
                                   const VNInfo *ValNo, MachineInstr *MI,
-                                  const SmallVectorImpl<LiveInterval*> &SpillIs,
+                                  const SmallVectorImpl<LiveInterval*> *SpillIs,
                                   bool &isLoad) {
   if (DisableReMat)
     return false;
@@ -982,9 +990,10 @@ LiveIntervals::isReMaterializable(const LiveInterval &li,
 
     // If a register operand of the re-materialized instruction is going to
     // be spilled next, then it's not legal to re-materialize this instruction.
-    for (unsigned i = 0, e = SpillIs.size(); i != e; ++i)
-      if (ImpUse == SpillIs[i]->reg)
-        return false;
+    if (SpillIs)
+      for (unsigned i = 0, e = SpillIs->size(); i != e; ++i)
+        if (ImpUse == (*SpillIs)[i]->reg)
+          return false;
   }
   return true;
 }
@@ -993,16 +1002,15 @@ LiveIntervals::isReMaterializable(const LiveInterval &li,
 /// val# of the specified interval is re-materializable.
 bool LiveIntervals::isReMaterializable(const LiveInterval &li,
                                        const VNInfo *ValNo, MachineInstr *MI) {
-  SmallVector<LiveInterval*, 4> Dummy1;
   bool Dummy2;
-  return isReMaterializable(li, ValNo, MI, Dummy1, Dummy2);
+  return isReMaterializable(li, ValNo, MI, 0, Dummy2);
 }
 
 /// isReMaterializable - Returns true if every definition of MI of every
 /// val# of the specified interval is re-materializable.
 bool
 LiveIntervals::isReMaterializable(const LiveInterval &li,
-                                  const SmallVectorImpl<LiveInterval*> &SpillIs,
+                                  const SmallVectorImpl<LiveInterval*> *SpillIs,
                                   bool &isLoad) {
   isLoad = false;
   for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
@@ -1499,7 +1507,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
         // ...
         // def = ...
         //     = use
-        // It's better to start a new interval to avoid artifically
+        // It's better to start a new interval to avoid artificially
         // extend the new interval.
         if (MI->readsWritesVirtualRegister(li.reg) ==
             std::make_pair(false,true)) {
@@ -1702,7 +1710,9 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
   // overflow a float. This expression behaves like 10^d for small d, but is
   // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
   // headroom before overflow.
-  float lc = std::pow(1 + (100.0f / (loopDepth+10)), (float)loopDepth);
+  // By the way, powf() might be unavailable here. For consistency,
+  // We may take pow(double,double).
+  float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth);
 
   return (isDef + isUse) * lc;
 }
@@ -1715,7 +1725,7 @@ static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
 
 std::vector<LiveInterval*> LiveIntervals::
 addIntervalsForSpills(const LiveInterval &li,
-                      const SmallVectorImpl<LiveInterval*> &SpillIs,
+                      const SmallVectorImpl<LiveInterval*> *SpillIs,
                       const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
   assert(li.isSpillable() && "attempt to spill already spilled interval!");
 
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index 205f28a..b67f966 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -35,12 +35,20 @@ void LiveIntervalUnion::unify(LiveInterval &VirtReg) {
   LiveInterval::iterator RegEnd = VirtReg.end();
   SegmentIter SegPos = Segments.find(RegPos->start);
 
-  for (;;) {
+  while (SegPos.valid()) {
     SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
     if (++RegPos == RegEnd)
       return;
     SegPos.advanceTo(RegPos->start);
   }
+
+  // We have reached the end of Segments, so it is no longer necessary to search
+  // for the insertion position.
+  // It is faster to insert the end first.
+  --RegEnd;
+  SegPos.insert(RegEnd->start, RegEnd->end, &VirtReg);
+  for (; RegPos != RegEnd; ++RegPos, ++SegPos)
+    SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
 }
 
 // Remove a live virtual register's segments from this union.
@@ -168,6 +176,7 @@ LiveIntervalUnion::Query::firstInterference() {
     return FirstInterference;
   CheckedFirstInterference = true;
   InterferenceResult &IR = FirstInterference;
+  IR.LiveUnionI.setMap(LiveUnion->getMap());
 
   // Quickly skip interference check for empty sets.
   if (VirtReg->empty() || LiveUnion->empty()) {
@@ -176,10 +185,10 @@ LiveIntervalUnion::Query::firstInterference() {
     // VirtReg starts first, perform double binary search.
     IR.VirtRegI = VirtReg->find(LiveUnion->startIndex());
     if (IR.VirtRegI != VirtReg->end())
-      IR.LiveUnionI = LiveUnion->find(IR.VirtRegI->start);
+      IR.LiveUnionI.find(IR.VirtRegI->start);
   } else {
     // LiveUnion starts first, perform double binary search.
-    IR.LiveUnionI = LiveUnion->find(VirtReg->beginIndex());
+    IR.LiveUnionI.find(VirtReg->beginIndex());
     if (IR.LiveUnionI.valid())
       IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start());
     else
@@ -235,7 +244,7 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
 //
 // For comments on how to speed it up, see Query::findIntersection().
 unsigned LiveIntervalUnion::Query::
-collectInterferingVRegs(unsigned MaxInterferingRegs) {
+collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) {
   InterferenceResult IR = firstInterference();
   LiveInterval::iterator VirtRegEnd = VirtReg->end();
   LiveInterval *RecentInterferingVReg = NULL;
@@ -277,6 +286,11 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
       // Cache the most recent interfering vreg to bypass isSeenInterference.
       RecentInterferingVReg = IR.LiveUnionI.value();
       ++IR.LiveUnionI;
+
+      // Stop collecting when the max weight is exceeded.
+      if (RecentInterferingVReg->weight >= MaxWeight)
+        return InterferingVRegs.size();
+
       continue;
     }
     // VirtRegI may have advanced far beyond LiveUnionI,
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
index 6f9c5f4..c83578e 100644
--- a/lib/CodeGen/LiveIntervalUnion.h
+++ b/lib/CodeGen/LiveIntervalUnion.h
@@ -95,6 +95,9 @@ public:
   // Remove a live virtual register's segments from this union.
   void extract(LiveInterval &VirtReg);
 
+  // Remove all inserted virtual registers.
+  void clear() { Segments.clear(); ++Tag; }
+
   // Print union, using TRI to translate register names
   void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
 
@@ -163,10 +166,10 @@ public:
     bool CheckedFirstInterference;
     bool SeenAllInterferences;
     bool SeenUnspillableVReg;
-    unsigned Tag;
+    unsigned Tag, UserTag;
 
   public:
-    Query(): LiveUnion(), VirtReg() {}
+    Query(): LiveUnion(), VirtReg(), Tag(0), UserTag(0) {}
 
     Query(LiveInterval *VReg, LiveIntervalUnion *LIU):
       LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false),
@@ -181,11 +184,13 @@ public:
       SeenAllInterferences = false;
       SeenUnspillableVReg = false;
       Tag = 0;
+      UserTag = 0;
     }
 
-    void init(LiveInterval *VReg, LiveIntervalUnion *LIU) {
+    void init(unsigned UTag, LiveInterval *VReg, LiveIntervalUnion *LIU) {
       assert(VReg && LIU && "Invalid arguments");
-      if (VirtReg == VReg && LiveUnion == LIU && !LIU->changedSince(Tag)) {
+      if (UserTag == UTag && VirtReg == VReg &&
+          LiveUnion == LIU && !LIU->changedSince(Tag)) {
         // Retain cached results, e.g. firstInterference.
         return;
       }
@@ -193,6 +198,7 @@ public:
       LiveUnion = LIU;
       VirtReg = VReg;
       Tag = LIU->getTag();
+      UserTag = UTag;
     }
 
     LiveInterval &virtReg() const {
@@ -223,7 +229,8 @@ public:
 
     // Count the virtual registers in this union that interfere with this
     // query's live virtual register, up to maxInterferingRegs.
-    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX);
+    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX,
+                                     float MaxWeight = HUGE_VALF);
 
     // Was this virtual register visited during collectInterferingVRegs?
     bool isSeenInterference(LiveInterval *VReg) const;
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 3bbda1c..f8a3dbb 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -11,24 +11,41 @@
 // is spilled or split.
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "regalloc"
 #include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
-LiveInterval &LiveRangeEdit::create(MachineRegisterInfo &mri,
-                                    LiveIntervals &lis,
-                                    VirtRegMap &vrm) {
-  const TargetRegisterClass *RC = mri.getRegClass(getReg());
-  unsigned VReg = mri.createVirtualRegister(RC);
-  vrm.grow();
-  vrm.setIsSplitFromReg(VReg, vrm.getOriginal(getReg()));
-  LiveInterval &li = lis.getOrCreateInterval(VReg);
-  newRegs_.push_back(&li);
-  return li;
+LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg,
+                                        LiveIntervals &LIS,
+                                        VirtRegMap &VRM) {
+  MachineRegisterInfo &MRI = VRM.getRegInfo();
+  unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+  VRM.grow();
+  VRM.setIsSplitFromReg(VReg, VRM.getOriginal(OldReg));
+  LiveInterval &LI = LIS.getOrCreateInterval(VReg);
+  newRegs_.push_back(&LI);
+  return LI;
+}
+
+bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
+                                          const MachineInstr *DefMI,
+                                          const TargetInstrInfo &tii,
+                                          AliasAnalysis *aa) {
+  assert(DefMI && "Missing instruction");
+  scannedRemattable_ = true;
+  if (!tii.isTriviallyReMaterializable(DefMI, aa))
+    return false;
+  remattable_.insert(VNI);
+  return true;
 }
 
 void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
@@ -42,8 +59,7 @@ void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
     MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def);
     if (!DefMI)
       continue;
-    if (tii.isTriviallyReMaterializable(DefMI, aa))
-      remattable_.insert(VNI);
+    checkRematerializable(VNI, DefMI, tii, aa);
   }
   scannedRemattable_ = true;
 }
@@ -66,18 +82,16 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
   UseIdx = UseIdx.getUseIndex();
   for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = OrigMI->getOperand(i);
-    if (!MO.isReg() || !MO.getReg() || MO.getReg() == getReg())
+    if (!MO.isReg() || !MO.getReg() || MO.isDef())
       continue;
     // Reserved registers are OK.
     if (MO.isUndef() || !lis.hasInterval(MO.getReg()))
       continue;
-    // We don't want to move any defs.
-    if (MO.isDef())
-      return false;
     // We cannot depend on virtual registers in uselessRegs_.
-    for (unsigned ui = 0, ue = uselessRegs_.size(); ui != ue; ++ui)
-      if (uselessRegs_[ui]->reg == MO.getReg())
-        return false;
+    if (uselessRegs_)
+      for (unsigned ui = 0, ue = uselessRegs_->size(); ui != ue; ++ui)
+        if ((*uselessRegs_)[ui]->reg == MO.getReg())
+          return false;
 
     LiveInterval &li = lis.getInterval(MO.getReg());
     const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
@@ -99,16 +113,22 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM,
   if (!remattable_.count(RM.ParentVNI))
     return false;
 
-  // No defining instruction.
-  RM.OrigMI = lis.getInstructionFromIndex(RM.ParentVNI->def);
-  assert(RM.OrigMI && "Defining instruction for remattable value disappeared");
+  // No defining instruction provided.
+  SlotIndex DefIdx;
+  if (RM.OrigMI)
+    DefIdx = lis.getInstructionIndex(RM.OrigMI);
+  else {
+    DefIdx = RM.ParentVNI->def;
+    RM.OrigMI = lis.getInstructionFromIndex(DefIdx);
+    assert(RM.OrigMI && "No defining instruction for remattable value");
+  }
 
   // If only cheap remats were requested, bail out early.
   if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove())
     return false;
 
   // Verify that all used registers are available with the same values.
-  if (!allUsesAvailableAt(RM.OrigMI, RM.ParentVNI->def, UseIdx, lis))
+  if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx, lis))
     return false;
 
   return true;
@@ -120,10 +140,174 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
                                          const Remat &RM,
                                          LiveIntervals &lis,
                                          const TargetInstrInfo &tii,
-                                         const TargetRegisterInfo &tri) {
+                                         const TargetRegisterInfo &tri,
+                                         bool Late) {
   assert(RM.OrigMI && "Invalid remat");
   tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
   rematted_.insert(RM.ParentVNI);
-  return lis.InsertMachineInstrInMaps(--MI).getDefIndex();
+  return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
+           .getDefIndex();
+}
+
+void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) {
+  if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg))
+    LIS.removeInterval(Reg);
+}
+
+bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
+                               SmallVectorImpl<MachineInstr*> &Dead,
+                               MachineRegisterInfo &MRI,
+                               LiveIntervals &LIS,
+                               const TargetInstrInfo &TII) {
+  MachineInstr *DefMI = 0, *UseMI = 0;
+
+  // Check that there is a single def and a single use.
+  for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(LI->reg),
+       E = MRI.reg_nodbg_end(); I != E; ++I) {
+    MachineOperand &MO = I.getOperand();
+    MachineInstr *MI = MO.getParent();
+    if (MO.isDef()) {
+      if (DefMI && DefMI != MI)
+        return false;
+      if (!MI->getDesc().canFoldAsLoad())
+        return false;
+      DefMI = MI;
+    } else if (!MO.isUndef()) {
+      if (UseMI && UseMI != MI)
+        return false;
+      // FIXME: Targets don't know how to fold subreg uses.
+      if (MO.getSubReg())
+        return false;
+      UseMI = MI;
+    }
+  }
+  if (!DefMI || !UseMI)
+    return false;
+
+  DEBUG(dbgs() << "Try to fold single def: " << *DefMI
+               << "       into single use: " << *UseMI);
+
+  SmallVector<unsigned, 8> Ops;
+  if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second)
+    return false;
+
+  MachineInstr *FoldMI = TII.foldMemoryOperand(UseMI, Ops, DefMI);
+  if (!FoldMI)
+    return false;
+  DEBUG(dbgs() << "                folded: " << *FoldMI);
+  LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI);
+  UseMI->eraseFromParent();
+  DefMI->addRegisterDead(LI->reg, 0);
+  Dead.push_back(DefMI);
+  return true;
+}
+
+void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+                                      LiveIntervals &LIS, VirtRegMap &VRM,
+                                      const TargetInstrInfo &TII) {
+  SetVector<LiveInterval*,
+            SmallVector<LiveInterval*, 8>,
+            SmallPtrSet<LiveInterval*, 8> > ToShrink;
+  MachineRegisterInfo &MRI = VRM.getRegInfo();
+
+  for (;;) {
+    // Erase all dead defs.
+    while (!Dead.empty()) {
+      MachineInstr *MI = Dead.pop_back_val();
+      assert(MI->allDefsAreDead() && "Def isn't really dead");
+      SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
+
+      // Never delete inline asm.
+      if (MI->isInlineAsm()) {
+        DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);
+        continue;
+      }
+
+      // Use the same criteria as DeadMachineInstructionElim.
+      bool SawStore = false;
+      if (!MI->isSafeToMove(&TII, 0, SawStore)) {
+        DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);
+        continue;
+      }
+
+      DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
+
+      // Check for live intervals that may shrink
+      for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+             MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+        if (!MOI->isReg())
+          continue;
+        unsigned Reg = MOI->getReg();
+        if (!TargetRegisterInfo::isVirtualRegister(Reg))
+          continue;
+        LiveInterval &LI = LIS.getInterval(Reg);
+
+        // Shrink read registers, unless it is likely to be expensive and
+        // unlikely to change anything. We typically don't want to shrink the
+        // PIC base register that has lots of uses everywhere.
+        // Always shrink COPY uses that probably come from live range splitting.
+        if (MI->readsVirtualRegister(Reg) &&
+            (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) ||
+             LI.killedAt(Idx)))
+          ToShrink.insert(&LI);
+
+        // Remove defined value.
+        if (MOI->isDef()) {
+          if (VNInfo *VNI = LI.getVNInfoAt(Idx)) {
+            if (delegate_)
+              delegate_->LRE_WillShrinkVirtReg(LI.reg);
+            LI.removeValNo(VNI);
+            if (LI.empty()) {
+              ToShrink.remove(&LI);
+              eraseVirtReg(Reg, LIS);
+            }
+          }
+        }
+      }
+
+      if (delegate_)
+        delegate_->LRE_WillEraseInstruction(MI);
+      LIS.RemoveMachineInstrFromMaps(MI);
+      MI->eraseFromParent();
+    }
+
+    if (ToShrink.empty())
+      break;
+
+    // Shrink just one live interval. Then delete new dead defs.
+    LiveInterval *LI = ToShrink.back();
+    ToShrink.pop_back();
+    if (foldAsLoad(LI, Dead, MRI, LIS, TII))
+      continue;
+    if (delegate_)
+      delegate_->LRE_WillShrinkVirtReg(LI->reg);
+    if (!LIS.shrinkToUses(LI, &Dead))
+      continue;
+
+    // LI may have been separated, create new intervals.
+    LI->RenumberValues(LIS);
+    ConnectedVNInfoEqClasses ConEQ(LIS);
+    unsigned NumComp = ConEQ.Classify(LI);
+    if (NumComp <= 1)
+      continue;
+    DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
+    SmallVector<LiveInterval*, 8> Dups(1, LI);
+    for (unsigned i = 1; i != NumComp; ++i) {
+      Dups.push_back(&createFrom(LI->reg, LIS, VRM));
+      if (delegate_)
+        delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
+    }
+    ConEQ.Distribute(&Dups[0], MRI);
+  }
 }
 
+void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
+                                             LiveIntervals &LIS,
+                                             const MachineLoopInfo &Loops) {
+  VirtRegAuxInfo VRAI(MF, LIS, Loops);
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    LiveInterval &LI = **I;
+    VRAI.CalculateRegClass(LI.reg);
+    VRAI.CalculateWeightAndHint(LI);
+  }
+}
diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h
index 73f69ed..14d227e 100644
--- a/lib/CodeGen/LiveRangeEdit.h
+++ b/lib/CodeGen/LiveRangeEdit.h
@@ -25,13 +25,36 @@ namespace llvm {
 
 class AliasAnalysis;
 class LiveIntervals;
+class MachineLoopInfo;
 class MachineRegisterInfo;
 class VirtRegMap;
 
 class LiveRangeEdit {
+public:
+  /// Callback methods for LiveRangeEdit owners.
+  struct Delegate {
+    /// Called immediately before erasing a dead machine instruction.
+    virtual void LRE_WillEraseInstruction(MachineInstr *MI) {}
+
+    /// Called when a virtual register is no longer used. Return false to defer
+    /// its deletion from LiveIntervals.
+    virtual bool LRE_CanEraseVirtReg(unsigned) { return true; }
+
+    /// Called before shrinking the live range of a virtual register.
+    virtual void LRE_WillShrinkVirtReg(unsigned) {}
+
+    /// Called after cloning a virtual register.
+    /// This is used for new registers representing connected components of Old.
+    virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {}
+
+    virtual ~Delegate() {}
+  };
+
+private:
   LiveInterval &parent_;
   SmallVectorImpl<LiveInterval*> &newRegs_;
-  const SmallVectorImpl<LiveInterval*> &uselessRegs_;
+  Delegate *const delegate_;
+  const SmallVectorImpl<LiveInterval*> *uselessRegs_;
 
   /// firstNew_ - Index of the first register added to newRegs_.
   const unsigned firstNew_;
@@ -41,11 +64,11 @@ class LiveRangeEdit {
 
   /// remattable_ - Values defined by remattable instructions as identified by
   /// tii.isTriviallyReMaterializable().
-  SmallPtrSet<VNInfo*,4> remattable_;
+  SmallPtrSet<const VNInfo*,4> remattable_;
 
   /// rematted_ - Values that were actually rematted, and so need to have their
   /// live range trimmed or entirely removed.
-  SmallPtrSet<VNInfo*,4> rematted_;
+  SmallPtrSet<const VNInfo*,4> rematted_;
 
   /// scanRemattable - Identify the parent_ values that may rematerialize.
   void scanRemattable(LiveIntervals &lis,
@@ -57,6 +80,11 @@ class LiveRangeEdit {
   bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
                           SlotIndex UseIdx, LiveIntervals &lis);
 
+  /// foldAsLoad - If LI has a single use and a single def that can be folded as
+  /// a load, eliminate the register by folding the def into the use.
+  bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr*> &Dead,
+                  MachineRegisterInfo&, LiveIntervals&, const TargetInstrInfo&);
+
 public:
   /// Create a LiveRangeEdit for breaking down parent into smaller pieces.
   /// @param parent The register being spilled or split.
@@ -66,9 +94,13 @@ public:
   ///        rematerializing values because they are about to be removed.
   LiveRangeEdit(LiveInterval &parent,
                 SmallVectorImpl<LiveInterval*> &newRegs,
-                const SmallVectorImpl<LiveInterval*> &uselessRegs)
-    : parent_(parent), newRegs_(newRegs), uselessRegs_(uselessRegs),
-      firstNew_(newRegs.size()), scannedRemattable_(false) {}
+                Delegate *delegate = 0,
+                const SmallVectorImpl<LiveInterval*> *uselessRegs = 0)
+    : parent_(parent), newRegs_(newRegs),
+      delegate_(delegate),
+      uselessRegs_(uselessRegs),
+      firstNew_(newRegs.size()),
+      scannedRemattable_(false) {}
 
   LiveInterval &getParent() const { return parent_; }
   unsigned getReg() const { return parent_.reg; }
@@ -81,16 +113,33 @@ public:
   bool empty() const { return size() == 0; }
   LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
 
-  /// create - Create a new register with the same class and stack slot as
+  /// FIXME: Temporary accessors until we can get rid of
+  /// LiveIntervals::AddIntervalsForSpills
+  SmallVectorImpl<LiveInterval*> *getNewVRegs() { return &newRegs_; }
+  const SmallVectorImpl<LiveInterval*> *getUselessVRegs() {
+    return uselessRegs_;
+  }
+
+  /// createFrom - Create a new virtual register based on OldReg.
+  LiveInterval &createFrom(unsigned OldReg, LiveIntervals&, VirtRegMap&);
+
+  /// create - Create a new register with the same class and original slot as
   /// parent.
-  LiveInterval &create(MachineRegisterInfo&, LiveIntervals&, VirtRegMap&);
+  LiveInterval &create(LiveIntervals &LIS, VirtRegMap &VRM) {
+    return createFrom(getReg(), LIS, VRM);
+  }
 
   /// anyRematerializable - Return true if any parent values may be
   /// rematerializable.
-  /// This function must be called before ny rematerialization is attempted.
+  /// This function must be called before any rematerialization is attempted.
   bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&,
                            AliasAnalysis*);
 
+  /// checkRematerializable - Manually add VNI to the list of rematerializable
+  /// values if DefMI may be rematerializable.
+  bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI,
+                             const TargetInstrInfo&, AliasAnalysis*);
+
   /// Remat - Information needed to rematerialize at a specific location.
   struct Remat {
     VNInfo *ParentVNI;      // parent_'s value at the remat location.
@@ -116,18 +165,35 @@ public:
                             const Remat &RM,
                             LiveIntervals&,
                             const TargetInstrInfo&,
-                            const TargetRegisterInfo&);
+                            const TargetRegisterInfo&,
+                            bool Late = false);
 
   /// markRematerialized - explicitly mark a value as rematerialized after doing
   /// it manually.
-  void markRematerialized(VNInfo *ParentVNI) {
+  void markRematerialized(const VNInfo *ParentVNI) {
     rematted_.insert(ParentVNI);
   }
 
   /// didRematerialize - Return true if ParentVNI was rematerialized anywhere.
-  bool didRematerialize(VNInfo *ParentVNI) const {
+  bool didRematerialize(const VNInfo *ParentVNI) const {
     return rematted_.count(ParentVNI);
   }
+
+  /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try
+  /// to erase it from LIS.
+  void eraseVirtReg(unsigned Reg, LiveIntervals &LIS);
+
+  /// eliminateDeadDefs - Try to delete machine instructions that are now dead
+  /// (allDefsAreDead returns true). This may cause live intervals to be trimmed
+  /// and further dead efs to be eliminated.
+  void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+                         LiveIntervals&, VirtRegMap&,
+                         const TargetInstrInfo&);
+
+  /// calculateRegClassAndHint - Recompute register class and hint for each new
+  /// register.
+  void calculateRegClassAndHint(MachineFunction&, LiveIntervals&,
+                                const MachineLoopInfo&);
 };
 
 }
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index dd43ef2..20bad60 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -107,9 +107,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
   // Mark the variable known alive in this bb
   VRInfo.AliveBlocks.set(BBNum);
 
-  for (MachineBasicBlock::const_pred_reverse_iterator PI = MBB->pred_rbegin(),
-         E = MBB->pred_rend(); PI != E; ++PI)
-    WorkList.push_back(*PI);
+  WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend());
 }
 
 void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
@@ -707,7 +705,7 @@ bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) {
 
   // Loop over all of the successors of the basic block, checking to see if
   // the value is either live in the block, or if it is killed in the block.
-  std::vector<MachineBasicBlock*> OpSuccBlocks;
+  SmallVector<MachineBasicBlock*, 8> OpSuccBlocks;
   for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
          E = MBB.succ_end(); SI != E; ++SI) {
     MachineBasicBlock *SuccMBB = *SI;
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index ccbff0a..57f3e34 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -363,8 +363,7 @@ void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
 }
 
 void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
-  std::vector<MachineBasicBlock *>::iterator I =
-    std::find(Predecessors.begin(), Predecessors.end(), pred);
+  pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred);
   assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
   Predecessors.erase(I);
 }
@@ -402,8 +401,7 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
 }
 
 bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
-  std::vector<MachineBasicBlock *>::const_iterator I =
-    std::find(Successors.begin(), Successors.end(), MBB);
+  const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB);
   return I != Successors.end();
 }
 
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 07a7d27..f97ccf6 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -365,6 +365,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
     if (!FoundCSE) {
       // Look for trivial copy coalescing opportunities.
       if (PerformTrivialCoalescing(MI, MBB)) {
+        Changed = true;
+
         // After coalescing MI itself may become a copy.
         if (MI->isCopyLike())
           continue;
@@ -379,10 +381,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
       if (NewMI) {
         Commuted = true;
         FoundCSE = VNT.count(NewMI);
-        if (NewMI != MI)
+        if (NewMI != MI) {
           // New instruction. It doesn't need to be kept.
           NewMI->eraseFromParent();
-        else if (!FoundCSE)
+          Changed = true;
+        } else if (!FoundCSE)
           // MI was changed but it didn't help, commute it back!
           (void)TII->commuteInstruction(MI);
       }
@@ -450,6 +453,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
         ++NumPhysCSEs;
       if (Commuted)
         ++NumCommutes;
+      Changed = true;
     } else {
       DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
       VNT.insert(MI, CurrVN++);
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index aa9ea61..71df6f8 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -441,6 +441,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
     OS << ")";
   }
 
+  // Print nontemporal info.
+  if (MMO.isNonTemporal())
+    OS << "(nontemporal)";
+
   return OS;
 }
 
@@ -451,7 +455,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
 /// MachineInstr ctor - This constructor creates a dummy MachineInstr with
 /// TID NULL and no operands.
 MachineInstr::MachineInstr()
-  : TID(0), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
+  : TID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+    MemRefs(0), MemRefsEnd(0),
     Parent(0) {
   // Make sure that we get added to a machine basicblock
   LeakDetector::addGarbageObject(this);
@@ -470,7 +475,7 @@ void MachineInstr::addImplicitDefUseOperands() {
 /// implicit operands. It reserves space for the number of operands specified by
 /// the TargetInstrDesc.
 MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
-  : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
+  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0), Parent(0) {
   if (!NoImp)
     NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
@@ -484,8 +489,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
 /// MachineInstr ctor - As above, but with a DebugLoc.
 MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
                            bool NoImp)
-  : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
-    Parent(0), debugLoc(dl) {
+  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+    MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
   if (!NoImp)
     NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
   Operands.reserve(NumImplicitOps + TID->getNumOperands());
@@ -499,7 +504,7 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
 /// that the MachineInstr is created and added to the end of the specified 
 /// basic block.
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
-  : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
+  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0), Parent(0) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
   NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
@@ -514,8 +519,8 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
 ///
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
                            const TargetInstrDesc &tid)
-  : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
-    Parent(0), debugLoc(dl) {
+  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+    MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
   NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
   Operands.reserve(NumImplicitOps + TID->getNumOperands());
@@ -528,7 +533,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
 /// MachineInstr ctor - Copies MachineInstr arg exactly
 ///
 MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
-  : TID(&MI.getDesc()), NumImplicitOps(0), AsmPrinterFlags(0),
+  : TID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
     MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
     Parent(0), debugLoc(MI.getDebugLoc()) {
   Operands.reserve(MI.getNumOperands());
@@ -538,6 +543,9 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
     addOperand(MI.getOperand(i));
   NumImplicitOps = MI.NumImplicitOps;
 
+  // Copy all the flags.
+  Flags = MI.Flags;
+
   // Set parent to null.
   Parent = 0;
 
@@ -1417,6 +1425,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
   }
 
   bool HaveSemi = false;
+  if (Flags) {
+    if (!HaveSemi) OS << ";"; HaveSemi = true;
+    OS << " flags: ";
+
+    if (Flags & FrameSetup)
+      OS << "FrameSetup";
+  }
+
   if (!memoperands_empty()) {
     if (!HaveSemi) OS << ";"; HaveSemi = true;
 
@@ -1447,13 +1463,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     }
   }
 
+  // Print debug location information.
   if (!debugLoc.isUnknown() && MF) {
-    if (!HaveSemi) OS << ";";
+    if (!HaveSemi) OS << ";"; HaveSemi = true;
     OS << " dbg:";
     printDebugLoc(debugLoc, MF, OS);
   }
 
-  OS << "\n";
+  OS << '\n';
 }
 
 bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
@@ -1530,13 +1547,8 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg,
       continue;
 
     if (Reg == IncomingReg) {
-      if (!Found) {
-        if (MO.isDead())
-          // The register is already marked dead.
-          return true;
-        MO.setIsDead();
-        Found = true;
-      }
+      MO.setIsDead();
+      Found = true;
     } else if (hasAliases && MO.isDead() &&
                TargetRegisterInfo::isPhysicalRegister(Reg)) {
       // There exists a super-register that's marked dead.
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 443fc2d..b315702 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -39,7 +39,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-
 using namespace llvm;
 
 STATISTIC(NumHoisted,
@@ -169,6 +168,10 @@ namespace {
     /// 
     bool IsLoopInvariantInst(MachineInstr &I);
 
+    /// HasAnyPHIUse - Return true if the specified register is used by any
+    /// phi node.
+    bool HasAnyPHIUse(unsigned Reg) const;
+
     /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
     /// and an use in the current loop, return true if the target considered
     /// it 'high'.
@@ -294,7 +297,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
     RegLimit.resize(NumRC);
     for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
            E = TRI->regclass_end(); I != E; ++I)
-      RegLimit[(*I)->getID()] = TLI->getRegPressureLimit(*I, MF);
+      RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF);
   }
 
   // Get our Loop information...
@@ -758,18 +761,25 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
 }
 
 
-/// HasPHIUses - Return true if the specified register has any PHI use.
-static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) {
+/// HasAnyPHIUse - Return true if the specified register is used by any
+/// phi node.
+bool MachineLICM::HasAnyPHIUse(unsigned Reg) const {
   for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
          UE = MRI->use_end(); UI != UE; ++UI) {
     MachineInstr *UseMI = &*UI;
     if (UseMI->isPHI())
       return true;
+    // Look pass copies as well.
+    if (UseMI->isCopy()) {
+      unsigned Def = UseMI->getOperand(0).getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Def) &&
+          HasAnyPHIUse(Def))
+        return true;
+    }
   }
   return false;
 }
 
-
 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
 /// and an use in the current loop, return true if the target considered
 /// it 'high'.
@@ -976,14 +986,13 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
       return false;
   }
 
-  // If result(s) of this instruction is used by PHIs, then don't hoist it.
-  // The presence of joins makes it difficult for current register allocator
-  // implementation to perform remat.
+  // If result(s) of this instruction is used by PHIs outside of the loop, then
+  // don't hoist it if the instruction because it will introduce an extra copy.
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || !MO.isDef())
       continue;
-    if (HasPHIUses(MO.getReg(), MRI))
+    if (HasAnyPHIUse(MO.getReg()))
       return false;
   }
 
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 8a93a24..916dff7 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -265,8 +265,11 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
     if (MI->isDebugValue())
       continue;
 
-    if (PerformTrivialForwardCoalescing(MI, &MBB))
+    bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
+    if (Joined) {
+      MadeChange = true;
       continue;
+    }
 
     if (SinkInstruction(MI, SawStore))
       ++NumSunk, MadeChange = true;
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 7351119..f95f411 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -402,6 +402,11 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   SmallVector<MachineOperand, 4> Cond;
   if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB),
                           TBB, FBB, Cond)) {
+    // If the block branches directly to a landing pad successor, pretend that
+    // the landing pad is a normal block.
+    LandingPadSuccs.erase(TBB);
+    LandingPadSuccs.erase(FBB);
+
     // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's
     // check whether its answers match up with reality.
     if (!TBB && !FBB) {
@@ -602,9 +607,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     // Check Live Variables.
     if (MI->isDebugValue()) {
       // Liveness checks are not valid for debug values.
-    } else if (MO->isUndef()) {
-      // An <undef> doesn't refer to any register, so just skip it.
-    } else if (MO->isUse()) {
+    } else if (MO->isUse() && !MO->isUndef()) {
       regsLiveInButUnused.erase(Reg);
 
       bool isKill = false;
@@ -612,13 +615,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
       if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
         // A two-addr use counts as a kill if use and def are the same.
         unsigned DefReg = MI->getOperand(defIdx).getReg();
-        if (Reg == DefReg) {
+        if (Reg == DefReg)
           isKill = true;
-          // And in that case an explicit kill flag is not allowed.
-          if (MO->isKill())
-            report("Illegal kill flag on two-address instruction operand",
-                   MO, MONum);
-        } else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
           report("Two-address instruction operands must be identical",
                  MO, MONum);
         }
@@ -675,8 +674,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
             MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
         }
       }
-    } else {
-      assert(MO->isDef());
+    } else if (MO->isDef()) {
       // Register defined.
       // TODO: verify that earlyclobber ops are not used.
       if (MO->isDead())
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 5f7cf58..af65f13 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -28,12 +28,17 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include <algorithm>
-#include <map>
 using namespace llvm;
 
+static cl::opt<bool>
+DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false),
+                     cl::Hidden, cl::desc("Disable critical edge splitting "
+                                          "during PHI elimination"));
+
 namespace {
   class PHIElimination : public MachineFunctionPass {
     MachineRegisterInfo *MRI; // Machine register information
@@ -105,10 +110,12 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
 
   // Split critical edges to help the coalescer
-  if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) {
-    MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
-    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
-      Changed |= SplitPHIEdges(MF, *I, *LV, MLI);
+  if (!DisableEdgeSplitting) {
+    if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) {
+      MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+      for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+        Changed |= SplitPHIEdges(MF, *I, *LV, MLI);
+    }
   }
 
   // Populate VRegPHIUseCount
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 3489db2..315aedd 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -55,6 +55,11 @@ FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) {
     RegisterRegAlloc::setDefault(RegAlloc);
   }
 
+  // This forces linking of the linear scan register allocator,
+  // so -regalloc=linearscan still works in clang.
+  if (Ctor == createLinearScanRegisterAllocator)
+    return createLinearScanRegisterAllocator();
+
   if (Ctor != createDefaultRegisterAllocator)
     return Ctor();
 
@@ -63,6 +68,6 @@ FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) {
   case CodeGenOpt::None:
     return createFastRegisterAllocator();
   default:
-    return createLinearScanRegisterAllocator();
+    return createGreedyRegisterAllocator();
   }
 }
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 5d7123c..c105bb0 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -30,6 +30,15 @@
 //     If the "sub" instruction all ready sets (or could be modified to set) the
 //     same flag that the "cmp" instruction sets and that "bz" uses, then we can
 //     eliminate the "cmp" instruction.
+//
+// - Optimize Bitcast pairs:
+//
+//     v1 = bitcast v0
+//     v2 = bitcast v1
+//        = v2
+//   =>
+//     v1 = bitcast v0
+//        = v0
 // 
 //===----------------------------------------------------------------------===//
 
@@ -57,7 +66,8 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
                 cl::desc("Disable the peephole optimizer"));
 
 STATISTIC(NumReuse,      "Number of extension results reused");
-STATISTIC(NumEliminated, "Number of compares eliminated");
+STATISTIC(NumBitcasts,   "Number of bitcasts eliminated");
+STATISTIC(NumCmps,       "Number of compares eliminated");
 STATISTIC(NumImmFold,    "Number of move immediate foled");
 
 namespace {
@@ -85,6 +95,7 @@ namespace {
     }
 
   private:
+    bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
     bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
     bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
                           SmallPtrSet<MachineInstr*, 8> &LocalMIs);
@@ -243,12 +254,85 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
   return Changed;
 }
 
+/// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that
+/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast
+/// a value cross register classes), and the source is defined by another
+/// bitcast instruction B. And if the register class of source of B matches
+/// the register class of instruction A, then it is legal to replace all uses
+/// of the def of A with source of B. e.g.
+///   %vreg0<def> = VMOVSR %vreg1
+///   %vreg3<def> = VMOVRS %vreg0
+///   Replace all uses of vreg3 with vreg1.
+
+bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
+                                             MachineBasicBlock *MBB) {
+  unsigned NumDefs = MI->getDesc().getNumDefs();
+  unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs;
+  if (NumDefs != 1)
+    return false;
+
+  unsigned Def = 0;
+  unsigned Src = 0;
+  for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (MO.isDef())
+      Def = Reg;
+    else if (Src)
+      // Multiple sources?
+      return false;
+    else
+      Src = Reg;
+  }
+
+  assert(Def && Src && "Malformed bitcast instruction!");
+
+  MachineInstr *DefMI = MRI->getVRegDef(Src);
+  if (!DefMI || !DefMI->getDesc().isBitcast())
+    return false;
+
+  unsigned SrcDef = 0;
+  unsigned SrcSrc = 0;
+  NumDefs = DefMI->getDesc().getNumDefs();
+  NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs;
+  if (NumDefs != 1)
+    return false;
+  for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
+    const MachineOperand &MO = DefMI->getOperand(i);
+    if (!MO.isReg() || MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (MO.isDef())
+      SrcDef = Reg;
+    else if (SrcSrc)
+      // Multiple sources?
+      return false;
+    else
+      SrcSrc = Reg;
+  }
+
+  if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def))
+    return false;
+
+  MRI->replaceRegWith(Def, SrcSrc);
+  MRI->clearKillFlags(SrcSrc);
+  MI->eraseFromParent();
+  ++NumBitcasts;
+  return true;
+}
+
 /// OptimizeCmpInstr - If the instruction is a compare and the previous
 /// instruction it's comparing against all ready sets (or could be modified to
 /// set) the same flag as the compare, then we can remove the comparison and use
 /// the flag from the previous instruction.
 bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
-                                         MachineBasicBlock *MBB){
+                                         MachineBasicBlock *MBB) {
   // If this instruction is a comparison against zero and isn't comparing a
   // physical register, we can try to optimize it.
   unsigned SrcReg;
@@ -259,7 +343,7 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
 
   // Attempt to optimize the comparison instruction.
   if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
-    ++NumEliminated;
+    ++NumCmps;
     return true;
   }
 
@@ -345,7 +429,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
         continue;
       }
 
-      if (MI->getDesc().isCompare()) {
+      const TargetInstrDesc &TID = MI->getDesc();
+
+      if (TID.isBitcast()) {
+        if (OptimizeBitcastInstr(MI, MBB)) {
+          // MI is deleted.
+          Changed = true;
+          MII = First ? I->begin() : llvm::next(PMII);
+          continue;
+        }        
+      } else if (TID.isCompare()) {
         if (OptimizeCmpInstr(MI, MBB)) {
           // MI is deleted.
           Changed = true;
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 9cd9941..c04d656 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -47,7 +47,6 @@ void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
 bool
 ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
                                             unsigned Reg, unsigned OpIdx,
-                                            const TargetInstrInfo *tii_,
                                             SmallSet<unsigned, 8> &ImpDefRegs) {
   switch(OpIdx) {
   case 1:
@@ -61,7 +60,6 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
 }
 
 static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
-                        const TargetInstrInfo *tii_,
                         SmallSet<unsigned, 8> &ImpDefRegs) {
   if (MI->isCopy()) {
     MachineOperand &MO0 = MI->getOperand(0);
@@ -86,11 +84,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
 
   bool Changed = false;
 
-  const TargetInstrInfo *tii_ = fn.getTarget().getInstrInfo();
-  const TargetRegisterInfo *tri_ = fn.getTarget().getRegisterInfo();
-  MachineRegisterInfo *mri_ = &fn.getRegInfo();
-
-  LiveVariables *lv_ = &getAnalysis<LiveVariables>();
+  TII = fn.getTarget().getInstrInfo();
+  TRI = fn.getTarget().getRegisterInfo();
+  MRI = &fn.getRegInfo();
+  LV = &getAnalysis<LiveVariables>();
 
   SmallSet<unsigned, 8> ImpDefRegs;
   SmallVector<MachineInstr*, 8> ImpDefMIs;
@@ -113,7 +110,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
         unsigned Reg = MI->getOperand(0).getReg();
         ImpDefRegs.insert(Reg);
         if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-          for (const unsigned *SS = tri_->getSubRegisters(Reg); *SS; ++SS)
+          for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
             ImpDefRegs.insert(*SS);
         }
         ImpDefMIs.push_back(MI);
@@ -125,7 +122,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
         MachineOperand &MO = MI->getOperand(1);
         if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
           if (MO.isKill()) {
-            LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg());
+            LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg());
             vi.removeKill(MI);
           }
           MI->eraseFromParent();
@@ -145,14 +142,14 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
         if (!ImpDefRegs.count(Reg))
           continue;
         // Use is a copy, just turn it into an implicit_def.
-        if (CanTurnIntoImplicitDef(MI, Reg, i, tii_, ImpDefRegs)) {
+        if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) {
           bool isKill = MO.isKill();
-          MI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
+          MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
           for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
             MI->RemoveOperand(j);
           if (isKill) {
             ImpDefRegs.erase(Reg);
-            LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
+            LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
             vi.removeKill(MI);
           }
           ChangedToImpDef = true;
@@ -210,8 +207,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
       // uses.
       bool Skip = false;
       SmallVector<MachineInstr*, 4> DeadImpDefs;
-      for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg),
-             DE = mri_->def_end(); DI != DE; ++DI) {
+      for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
+             DE = MRI->def_end(); DI != DE; ++DI) {
         MachineInstr *DeadImpDef = &*DI;
         if (!DeadImpDef->isImplicitDef()) {
           Skip = true;
@@ -229,8 +226,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
       Changed = true;
 
       // Process each use instruction once.
-      for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
-             UE = mri_->use_end(); UI != UE; ++UI) {
+      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+             UE = MRI->use_end(); UI != UE; ++UI) {
         if (UI.getOperand().isUndef())
           continue;
         MachineInstr *RMI = &*UI;
@@ -242,8 +239,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
         MachineInstr *RMI = RUses[i];
 
         // Turn a copy use into an implicit_def.
-        if (isUndefCopy(RMI, Reg, tii_, ImpDefRegs)) {
-          RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
+        if (isUndefCopy(RMI, Reg, ImpDefRegs)) {
+          RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
 
           bool isKill = false;
           SmallVector<unsigned, 4> Ops;
@@ -263,15 +260,15 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
 
           // Update LiveVariables varinfo if the instruction is a kill.
           if (isKill) {
-            LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
+            LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
             vi.removeKill(RMI);
           }
           continue;
         }
 
         // Replace Reg with a new vreg that's marked implicit.
-        const TargetRegisterClass* RC = mri_->getRegClass(Reg);
-        unsigned NewVReg = mri_->createVirtualRegister(RC);
+        const TargetRegisterClass* RC = MRI->getRegClass(Reg);
+        unsigned NewVReg = MRI->createVirtualRegister(RC);
         bool isKill = true;
         for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
           MachineOperand &RRMO = RMI->getOperand(j);
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index ad7b6e4..f1f3c99 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -337,7 +337,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
         --BeforeI;
 
       // Restore all registers immediately before the return and any
-      // terminators that preceed it.
+      // terminators that precede it.
       if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
         for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
           unsigned Reg = CSI[i].getReg();
@@ -437,7 +437,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
       --BeforeI;
 
     // Restore all registers immediately before the return and any
-    // terminators that preceed it.
+    // terminators that precede it.
     for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
       unsigned Reg = blockCSI[i].getReg();
       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
@@ -559,7 +559,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Make sure the special register scavenging spill slot is closest to the
   // frame pointer if a frame pointer is required.
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
-  if (RS && TFI.hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) {
+  if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) &&
+      !RegInfo->needsStackRealignment(Fn)) {
     int SFI = RS->getScavengingFrameIndex();
     if (SFI >= 0)
       AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
@@ -641,7 +642,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
   // Make sure the special register scavenging spill slot is closest to the
   // stack pointer.
-  if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) {
+  if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) ||
+             !RegInfo->useFPForScavengingIndex(Fn))) {
     int SFI = RS->getScavengingFrameIndex();
     if (SFI >= 0)
       AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
@@ -811,7 +813,6 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
     // directly.
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
       MachineInstr *MI = I;
-      bool DoIncr = true;
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         if (MI->getOperand(i).isReg()) {
           MachineOperand &MO = MI->getOperand(i);
@@ -842,10 +843,8 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
 
         }
       }
-      if (DoIncr) {
-        RS->forward(I);
-        ++I;
-      }
+      RS->forward(I);
+      ++I;
     }
   }
 }
diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt
index b655dda4..7f75f65 100644
--- a/lib/CodeGen/README.txt
+++ b/lib/CodeGen/README.txt
@@ -26,7 +26,7 @@ and then "merge" mul and mov:
         sxth r3, r3
         mla r4, r3, lr, r4
 
-It also increase the likelyhood the store may become dead.
+It also increase the likelihood the store may become dead.
 
 //===---------------------------------------------------------------------===//
 
@@ -162,7 +162,7 @@ synthesize the various copy insertion/inspection methods in TargetInstrInfo.
 
 //===---------------------------------------------------------------------===//
 
-Stack coloring improvments:
+Stack coloring improvements:
 
 1. Do proper LiveStackAnalysis on all stack objects including those which are
    not spill slots.
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index 5af0ce7..f431d5a 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -61,6 +61,11 @@ class LiveVirtRegQueue;
 /// assignment order.
 class RegAllocBase {
   LiveIntervalUnion::Allocator UnionAllocator;
+
+  // Cache tag for PhysReg2LiveUnion entries. Increment whenever virtual
+  // registers may have changed.
+  unsigned UserTag;
+
 protected:
   // Array of LiveIntervalUnions indexed by physical register.
   class LiveUnionArray {
@@ -92,7 +97,7 @@ protected:
   // query on a new live virtual register.
   OwningArrayPtr<LiveIntervalUnion::Query> Queries;
 
-  RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0) {}
+  RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {}
 
   virtual ~RegAllocBase() {}
 
@@ -104,7 +109,7 @@ protected:
   // before querying a new live virtual register. This ties Queries and
   // PhysReg2LiveUnion together.
   LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) {
-    Queries[PhysReg].init(&VirtReg, &PhysReg2LiveUnion[PhysReg]);
+    Queries[PhysReg].init(UserTag, &VirtReg, &PhysReg2LiveUnion[PhysReg]);
     return Queries[PhysReg];
   }
 
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 6923908..d92d80f 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -13,7 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
+#include "LiveDebugVariables.h"
 #include "LiveIntervalUnion.h"
+#include "LiveRangeEdit.h"
 #include "RegAllocBase.h"
 #include "RenderMachineFunction.h"
 #include "Spiller.h"
@@ -136,6 +138,7 @@ char RABasic::ID = 0;
 } // end anonymous namespace
 
 RABasic::RABasic(): MachineFunctionPass(ID) {
+  initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
   initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
   initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
@@ -154,6 +157,8 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<AliasAnalysis>();
   AU.addRequired<LiveIntervals>();
   AU.addPreserved<SlotIndexes>();
+  AU.addRequired<LiveDebugVariables>();
+  AU.addPreserved<LiveDebugVariables>();
   if (StrongPHIElim)
     AU.addRequiredID(StrongPHIEliminationID);
   AU.addRequiredTransitive<RegisterCoalescer>();
@@ -230,9 +235,12 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
   MRI = &vrm.getRegInfo();
   VRM = &vrm;
   LIS = &lis;
-  PhysReg2LiveUnion.init(UnionAllocator, TRI->getNumRegs());
-  // Cache an interferece query for each physical reg
-  Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+  const unsigned NumRegs = TRI->getNumRegs();
+  if (NumRegs != PhysReg2LiveUnion.numRegs()) {
+    PhysReg2LiveUnion.init(UnionAllocator, NumRegs);
+    // Cache an interferece query for each physical reg
+    Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+  }
 }
 
 void RegAllocBase::LiveUnionArray::clear() {
@@ -246,13 +254,15 @@ void RegAllocBase::LiveUnionArray::clear() {
 }
 
 void RegAllocBase::releaseMemory() {
-  PhysReg2LiveUnion.clear();
+  for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r)
+    PhysReg2LiveUnion[r].clear();
 }
 
 // Visit all the live registers. If they are already assigned to a physical
 // register, unify them with the corresponding LiveIntervalUnion, otherwise push
 // them on the priority queue for later assignment.
 void RegAllocBase::seedLiveRegs() {
+  NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
   for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
     unsigned RegNum = I->first;
     LiveInterval &VirtReg = *I->second;
@@ -268,6 +278,7 @@ void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
                << " to " << PrintReg(PhysReg, TRI) << '\n');
   assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
   VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+  MRI->setPhysRegUsed(PhysReg);
   PhysReg2LiveUnion[PhysReg].unify(VirtReg);
   ++NumAssigned;
 }
@@ -288,6 +299,18 @@ void RegAllocBase::allocatePhysRegs() {
 
   // Continue assigning vregs one at a time to available physical registers.
   while (LiveInterval *VirtReg = dequeue()) {
+    assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
+
+    // Unused registers can appear when the spiller coalesces snippets.
+    if (MRI->reg_nodbg_empty(VirtReg->reg)) {
+      DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
+      LIS->removeInterval(VirtReg->reg);
+      continue;
+    }
+
+    // Invalidate all interference queries, live ranges could have changed.
+    ++UserTag;
+
     // selectOrSplit requests the allocator to return an available physical
     // register if possible and populate a list of new live intervals that
     // result from splitting.
@@ -304,7 +327,12 @@ void RegAllocBase::allocatePhysRegs() {
     for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
          I != E; ++I) {
       LiveInterval *SplitVirtReg = *I;
-      if (SplitVirtReg->empty()) continue;
+      assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
+      if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
+        DEBUG(dbgs() << "not queueing unused  " << *SplitVirtReg << '\n');
+        LIS->removeInterval(SplitVirtReg->reg);
+        continue;
+      }
       DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
       assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
              "expect split value in virtual register");
@@ -344,7 +372,8 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
     unassign(SpilledVReg, PhysReg);
 
     // Spill the extracted interval.
-    spiller().spill(&SpilledVReg, SplitVRegs, PendingSpills);
+    LiveRangeEdit LRE(SpilledVReg, SplitVRegs, 0, &PendingSpills);
+    spiller().spill(LRE);
   }
   // After extracting segments, the query's results are invalid. But keep the
   // contents valid until we're done accessing pendingSpills.
@@ -381,29 +410,31 @@ RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
 // Add newly allocated physical registers to the MBB live in sets.
 void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
   NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
-  typedef SmallVector<MachineBasicBlock*, 8> MBBVec;
-  MBBVec liveInMBBs;
-  MachineBasicBlock &entryMBB = *MF->begin();
+  SlotIndexes *Indexes = LIS->getSlotIndexes();
+  if (MF->size() <= 1)
+    return;
 
+  LiveIntervalUnion::SegmentIter SI;
   for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
     LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
     if (LiveUnion.empty())
       continue;
-    for (LiveIntervalUnion::SegmentIter SI = LiveUnion.begin(); SI.valid();
-         ++SI) {
-
-      // Find the set of basic blocks which this range is live into...
-      liveInMBBs.clear();
-      if (!LIS->findLiveInMBBs(SI.start(), SI.stop(), liveInMBBs)) continue;
-
-      // And add the physreg for this interval to their live-in sets.
-      for (MBBVec::iterator I = liveInMBBs.begin(), E = liveInMBBs.end();
-           I != E; ++I) {
-        MachineBasicBlock *MBB = *I;
-        if (MBB == &entryMBB) continue;
-        if (MBB->isLiveIn(PhysReg)) continue;
-        MBB->addLiveIn(PhysReg);
-      }
+    MachineFunction::iterator MBB = llvm::next(MF->begin());
+    MachineFunction::iterator MFE = MF->end();
+    SlotIndex Start, Stop;
+    tie(Start, Stop) = Indexes->getMBBRange(MBB);
+    SI.setMap(LiveUnion.getMap());
+    SI.find(Start);
+    while (SI.valid()) {
+      if (SI.start() <= Start) {
+        if (!MBB->isLiveIn(PhysReg))
+          MBB->addLiveIn(PhysReg);
+      } else if (SI.start() > Stop)
+        MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
+      if (++MBB == MFE)
+        break;
+      tie(Start, Stop) = Indexes->getMBBRange(MBB);
+      SI.advanceTo(Start);
     }
   }
 }
@@ -469,9 +500,8 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
   }
   // No other spill candidates were found, so spill the current VirtReg.
   DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
-  SmallVector<LiveInterval*, 1> pendingSpills;
-
-  spiller().spill(&VirtReg, SplitVRegs, pendingSpills);
+  LiveRangeEdit LRE(VirtReg, SplitVRegs);
+  spiller().spill(LRE);
 
   // The live virtual register requesting allocation was spilled, so tell
   // the caller not to allocate anything during this round.
@@ -490,7 +520,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
 
   ReservedRegs = TRI->getReservedRegs(*MF);
 
-  SpillerInstance.reset(createSpiller(*this, *MF, *VRM));
+  SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
 
   allocatePhysRegs();
 
@@ -525,6 +555,9 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
   // Run rewriter
   VRM->rewrite(LIS->getSlotIndexes());
 
+  // Write out new DBG_VALUE instructions.
+  getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
+
   // The pass output is in VirtRegMap. Release all the transient data.
   releaseMemory();
 
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 15036e3..b2fd6e0 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -97,7 +97,7 @@ namespace {
       // immediately without checking aliases.
       regFree,
 
-      // A reserved register has been assigned expolicitly (e.g., setting up a
+      // A reserved register has been assigned explicitly (e.g., setting up a
       // call parameter), and it remains reserved until it is used.
       regReserved
 
@@ -396,7 +396,6 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
   PhysRegState[PhysReg] = NewState;
   for (const unsigned *AS = TRI->getAliasSet(PhysReg);
        unsigned Alias = *AS; ++AS) {
-    UsedInInstr.set(Alias);
     switch (unsigned VirtReg = PhysRegState[Alias]) {
     case regDisabled:
       break;
@@ -420,20 +419,25 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
 // can be allocated directly.
 // Returns spillImpossible when PhysReg or an alias can't be spilled.
 unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
-  if (UsedInInstr.test(PhysReg))
+  if (UsedInInstr.test(PhysReg)) {
+    DEBUG(dbgs() << "PhysReg: " << PhysReg << " is already used in instr.\n");
     return spillImpossible;
+  }
   switch (unsigned VirtReg = PhysRegState[PhysReg]) {
   case regDisabled:
     break;
   case regFree:
     return 0;
   case regReserved:
+    DEBUG(dbgs() << "VirtReg: " << VirtReg << " corresponding to PhysReg: "
+          << PhysReg << " is reserved already.\n");
     return spillImpossible;
   default:
     return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
   }
 
-  // This is a disabled register, add up const of aliases.
+  // This is a disabled register, add up cost of aliases.
+  DEBUG(dbgs() << "\tRegister: " << PhysReg << " is disabled.\n");
   unsigned Cost = 0;
   for (const unsigned *AS = TRI->getAliasSet(PhysReg);
        unsigned Alias = *AS; ++AS) {
@@ -511,9 +515,14 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
 
   unsigned BestReg = 0, BestCost = spillImpossible;
   for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
-    if (!Allocatable.test(*I))
+    if (!Allocatable.test(*I)) {
+      DEBUG(dbgs() << "\tRegister " << *I << " is not allocatable.\n");
       continue;
+    }
     unsigned Cost = calcSpillCost(*I);
+    DEBUG(dbgs() << "\tRegister: " << *I << "\n");
+    DEBUG(dbgs() << "\tCost: " << Cost << "\n");
+    DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
     // Cost is 0 when all aliases are already disabled.
     if (Cost == 0)
       return assignVirtToPhysReg(LRE, *I);
@@ -722,9 +731,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
     unsigned Reg = MO.getReg();
     if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+    DEBUG(dbgs() << "\tSetting reg " << Reg << " as used in instr\n");
     UsedInInstr.set(Reg);
-    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
-      UsedInInstr.set(*AS);
   }
 
   // Also mark PartialDefs as used to avoid reallocation.
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 406485a..7c461d8 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -14,7 +14,8 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "AllocationOrder.h"
-#include "LiveIntervalUnion.h"
+#include "InterferenceCache.h"
+#include "LiveDebugVariables.h"
 #include "LiveRangeEdit.h"
 #include "RegAllocBase.h"
 #include "Spiller.h"
@@ -49,14 +50,16 @@ using namespace llvm;
 
 STATISTIC(NumGlobalSplits, "Number of split global live ranges");
 STATISTIC(NumLocalSplits,  "Number of split local live ranges");
-STATISTIC(NumReassigned,   "Number of interferences reassigned");
 STATISTIC(NumEvicted,      "Number of interferences evicted");
 
 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
                                        createGreedyRegisterAllocator);
 
 namespace {
-class RAGreedy : public MachineFunctionPass, public RegAllocBase {
+class RAGreedy : public MachineFunctionPass,
+                 public RegAllocBase,
+                 private LiveRangeEdit::Delegate {
+
   // context
   MachineFunction *MF;
   BitVector ReservedRegs;
@@ -72,14 +75,73 @@ class RAGreedy : public MachineFunctionPass, public RegAllocBase {
 
   // state
   std::auto_ptr<Spiller> SpillerInstance;
-  std::auto_ptr<SplitAnalysis> SA;
   std::priority_queue<std::pair<unsigned, unsigned> > Queue;
-  IndexedMap<unsigned, VirtReg2IndexFunctor> Generation;
+
+  // Live ranges pass through a number of stages as we try to allocate them.
+  // Some of the stages may also create new live ranges:
+  //
+  // - Region splitting.
+  // - Per-block splitting.
+  // - Local splitting.
+  // - Spilling.
+  //
+  // Ranges produced by one of the stages skip the previous stages when they are
+  // dequeued. This improves performance because we can skip interference checks
+  // that are unlikely to give any results. It also guarantees that the live
+  // range splitting algorithm terminates, something that is otherwise hard to
+  // ensure.
+  enum LiveRangeStage {
+    RS_New,      ///< Never seen before.
+    RS_First,    ///< First time in the queue.
+    RS_Second,   ///< Second time in the queue.
+    RS_Global,   ///< Produced by global splitting.
+    RS_Local,    ///< Produced by local splitting.
+    RS_Spill     ///< Produced by spilling.
+  };
+
+  IndexedMap<unsigned char, VirtReg2IndexFunctor> LRStage;
+
+  LiveRangeStage getStage(const LiveInterval &VirtReg) const {
+    return LiveRangeStage(LRStage[VirtReg.reg]);
+  }
+
+  template<typename Iterator>
+  void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
+    LRStage.resize(MRI->getNumVirtRegs());
+    for (;Begin != End; ++Begin) {
+      unsigned Reg = (*Begin)->reg;
+      if (LRStage[Reg] == RS_New)
+        LRStage[Reg] = NewStage;
+    }
+  }
 
   // splitting state.
+  std::auto_ptr<SplitAnalysis> SA;
+  std::auto_ptr<SplitEditor> SE;
 
-  /// All basic blocks where the current register is live.
-  SmallVector<SpillPlacement::BlockConstraint, 8> SpillConstraints;
+  /// Cached per-block interference maps
+  InterferenceCache IntfCache;
+
+  /// All basic blocks where the current register has uses.
+  SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
+
+  /// Global live range splitting candidate info.
+  struct GlobalSplitCandidate {
+    unsigned PhysReg;
+    BitVector LiveBundles;
+    SmallVector<unsigned, 8> ActiveBlocks;
+
+    void reset(unsigned Reg) {
+      PhysReg = Reg;
+      LiveBundles.clear();
+      ActiveBlocks.clear();
+    }
+  };
+
+  /// Candidate info for for each PhysReg in AllocationOrder.
+  /// This vector never shrinks, but grows to the size of the largest register
+  /// class.
+  SmallVector<GlobalSplitCandidate, 32> GlobalCand;
 
   /// For every instruction in SA->UseSlots, store the previous non-copy
   /// instruction.
@@ -108,42 +170,50 @@ public:
   static char ID;
 
 private:
-  bool checkUncachedInterference(LiveInterval&, unsigned);
-  LiveInterval *getSingleInterference(LiveInterval&, unsigned);
-  bool reassignVReg(LiveInterval &InterferingVReg, unsigned OldPhysReg);
-  float calcInterferenceWeight(LiveInterval&, unsigned);
-  float calcInterferenceInfo(LiveInterval&, unsigned);
-  float calcGlobalSplitCost(const BitVector&);
-  void splitAroundRegion(LiveInterval&, unsigned, const BitVector&,
+  void LRE_WillEraseInstruction(MachineInstr*);
+  bool LRE_CanEraseVirtReg(unsigned);
+  void LRE_WillShrinkVirtReg(unsigned);
+  void LRE_DidCloneVirtReg(unsigned, unsigned);
+
+  float calcSpillCost();
+  bool addSplitConstraints(InterferenceCache::Cursor, float&);
+  void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+  void growRegion(GlobalSplitCandidate &Cand, InterferenceCache::Cursor);
+  float calcGlobalSplitCost(GlobalSplitCandidate&, InterferenceCache::Cursor);
+  void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&,
                          SmallVectorImpl<LiveInterval*>&);
   void calcGapWeights(unsigned, SmallVectorImpl<float>&);
   SlotIndex getPrevMappedIndex(const MachineInstr*);
   void calcPrevSlots();
   unsigned nextSplitPoint(unsigned);
-  bool canEvictInterference(LiveInterval&, unsigned, unsigned, float&);
+  bool canEvictInterference(LiveInterval&, unsigned, float&);
 
-  unsigned tryReassign(LiveInterval&, AllocationOrder&,
-                              SmallVectorImpl<LiveInterval*>&);
+  unsigned tryAssign(LiveInterval&, AllocationOrder&,
+                     SmallVectorImpl<LiveInterval*>&);
   unsigned tryEvict(LiveInterval&, AllocationOrder&,
-                    SmallVectorImpl<LiveInterval*>&);
+                    SmallVectorImpl<LiveInterval*>&, unsigned = ~0u);
   unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
                           SmallVectorImpl<LiveInterval*>&);
   unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
     SmallVectorImpl<LiveInterval*>&);
   unsigned trySplit(LiveInterval&, AllocationOrder&,
                     SmallVectorImpl<LiveInterval*>&);
-  unsigned trySpillInterferences(LiveInterval&, AllocationOrder&,
-                                 SmallVectorImpl<LiveInterval*>&);
 };
 } // end anonymous namespace
 
 char RAGreedy::ID = 0;
 
+// Hysteresis to use when comparing floats.
+// This helps stabilize decisions based on float comparisons.
+const float Hysteresis = 0.98f;
+
+
 FunctionPass* llvm::createGreedyRegisterAllocator() {
   return new RAGreedy();
 }
 
-RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
+RAGreedy::RAGreedy(): MachineFunctionPass(ID), LRStage(RS_New) {
+  initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
   initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
@@ -166,6 +236,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<LiveIntervals>();
   AU.addRequired<SlotIndexes>();
   AU.addPreserved<SlotIndexes>();
+  AU.addRequired<LiveDebugVariables>();
+  AU.addPreserved<LiveDebugVariables>();
   if (StrongPHIElim)
     AU.addRequiredID(StrongPHIEliminationID);
   AU.addRequiredTransitive<RegisterCoalescer>();
@@ -185,9 +257,49 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
+
+//===----------------------------------------------------------------------===//
+//                     LiveRangeEdit delegate methods
+//===----------------------------------------------------------------------===//
+
+void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) {
+  // LRE itself will remove from SlotIndexes and parent basic block.
+  VRM->RemoveMachineInstrFromMaps(MI);
+}
+
+bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
+  if (unsigned PhysReg = VRM->getPhys(VirtReg)) {
+    unassign(LIS->getInterval(VirtReg), PhysReg);
+    return true;
+  }
+  // Unassigned virtreg is probably in the priority queue.
+  // RegAllocBase will erase it after dequeueing.
+  return false;
+}
+
+void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
+  unsigned PhysReg = VRM->getPhys(VirtReg);
+  if (!PhysReg)
+    return;
+
+  // Register is assigned, put it back on the queue for reassignment.
+  LiveInterval &LI = LIS->getInterval(VirtReg);
+  unassign(LI, PhysReg);
+  enqueue(&LI);
+}
+
+void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
+  // LRE may clone a virtual register because dead code elimination causes it to
+  // be split into connected components. Ensure that the new register gets the
+  // same stage as the parent.
+  LRStage.grow(New);
+  LRStage[New] = LRStage[Old];
+}
+
 void RAGreedy::releaseMemory() {
   SpillerInstance.reset(0);
-  Generation.clear();
+  LRStage.clear();
+  GlobalCand.clear();
   RegAllocBase::releaseMemory();
 }
 
@@ -198,20 +310,26 @@ void RAGreedy::enqueue(LiveInterval *LI) {
   const unsigned Reg = LI->reg;
   assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
          "Can only enqueue virtual registers");
-  const unsigned Hint = VRM->getRegAllocPref(Reg);
   unsigned Prio;
 
-  Generation.grow(Reg);
-  if (++Generation[Reg] == 1)
-    // 1st generation ranges are handled first, long -> short.
+  LRStage.grow(Reg);
+  if (LRStage[Reg] == RS_New)
+    LRStage[Reg] = RS_First;
+
+  if (LRStage[Reg] == RS_Second)
+    // Unsplit ranges that couldn't be allocated immediately are deferred until
+    // everything else has been allocated. Long ranges are allocated last so
+    // they are split against realistic interference.
+    Prio = (1u << 31) - Size;
+  else {
+    // Everything else is allocated in long->short order. Long ranges that don't
+    // fit should be spilled ASAP so they don't create interference.
     Prio = (1u << 31) + Size;
-  else
-    // Repeat offenders are handled second, short -> long
-    Prio = (1u << 30) - Size;
 
-  // Boost ranges that have a physical register hint.
-  if (TargetRegisterInfo::isPhysicalRegister(Hint))
-    Prio |= (1u << 30);
+    // Boost ranges that have a physical register hint.
+    if (TargetRegisterInfo::isPhysicalRegister(VRM->getRegAllocPref(Reg)))
+      Prio |= (1u << 30);
+  }
 
   Queue.push(std::make_pair(Prio, Reg));
 }
@@ -224,97 +342,34 @@ LiveInterval *RAGreedy::dequeue() {
   return LI;
 }
 
+
 //===----------------------------------------------------------------------===//
-//                         Register Reassignment
+//                            Direct Assignment
 //===----------------------------------------------------------------------===//
 
-// Check interference without using the cache.
-bool RAGreedy::checkUncachedInterference(LiveInterval &VirtReg,
-                                         unsigned PhysReg) {
-  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
-    LiveIntervalUnion::Query subQ(&VirtReg, &PhysReg2LiveUnion[*AliasI]);
-    if (subQ.checkInterference())
-      return true;
-  }
-  return false;
-}
-
-/// getSingleInterference - Return the single interfering virtual register
-/// assigned to PhysReg. Return 0 if more than one virtual register is
-/// interfering.
-LiveInterval *RAGreedy::getSingleInterference(LiveInterval &VirtReg,
-                                              unsigned PhysReg) {
-  // Check physreg and aliases.
-  LiveInterval *Interference = 0;
-  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
-    LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
-    if (Q.checkInterference()) {
-      if (Interference)
-        return 0;
-      if (Q.collectInterferingVRegs(2) > 1)
-        return 0;
-      Interference = Q.interferingVRegs().front();
-    }
-  }
-  return Interference;
-}
-
-// Attempt to reassign this virtual register to a different physical register.
-//
-// FIXME: we are not yet caching these "second-level" interferences discovered
-// in the sub-queries. These interferences can change with each call to
-// selectOrSplit. However, we could implement a "may-interfere" cache that
-// could be conservatively dirtied when we reassign or split.
-//
-// FIXME: This may result in a lot of alias queries. We could summarize alias
-// live intervals in their parent register's live union, but it's messy.
-bool RAGreedy::reassignVReg(LiveInterval &InterferingVReg,
-                            unsigned WantedPhysReg) {
-  assert(TargetRegisterInfo::isVirtualRegister(InterferingVReg.reg) &&
-         "Can only reassign virtual registers");
-  assert(TRI->regsOverlap(WantedPhysReg, VRM->getPhys(InterferingVReg.reg)) &&
-         "inconsistent phys reg assigment");
-
-  AllocationOrder Order(InterferingVReg.reg, *VRM, ReservedRegs);
-  while (unsigned PhysReg = Order.next()) {
-    // Don't reassign to a WantedPhysReg alias.
-    if (TRI->regsOverlap(PhysReg, WantedPhysReg))
-      continue;
-
-    if (checkUncachedInterference(InterferingVReg, PhysReg))
-      continue;
+/// tryAssign - Try to assign VirtReg to an available register.
+unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
+                             AllocationOrder &Order,
+                             SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  Order.rewind();
+  unsigned PhysReg;
+  while ((PhysReg = Order.next()))
+    if (!checkPhysRegInterference(VirtReg, PhysReg))
+      break;
+  if (!PhysReg || Order.isHint(PhysReg))
+    return PhysReg;
 
-    // Reassign the interfering virtual reg to this physical reg.
-    unsigned OldAssign = VRM->getPhys(InterferingVReg.reg);
-    DEBUG(dbgs() << "reassigning: " << InterferingVReg << " from " <<
-          TRI->getName(OldAssign) << " to " << TRI->getName(PhysReg) << '\n');
-    unassign(InterferingVReg, OldAssign);
-    assign(InterferingVReg, PhysReg);
-    ++NumReassigned;
-    return true;
-  }
-  return false;
-}
+  // PhysReg is available. Try to evict interference from a cheaper alternative.
+  unsigned Cost = TRI->getCostPerUse(PhysReg);
 
-/// tryReassign - Try to reassign a single interference to a different physreg.
-/// @param  VirtReg Currently unassigned virtual register.
-/// @param  Order   Physregs to try.
-/// @return         Physreg to assign VirtReg, or 0.
-unsigned RAGreedy::tryReassign(LiveInterval &VirtReg, AllocationOrder &Order,
-                               SmallVectorImpl<LiveInterval*> &NewVRegs){
-  NamedRegionTimer T("Reassign", TimerGroupName, TimePassesIsEnabled);
+  // Most registers have 0 additional cost.
+  if (!Cost)
+    return PhysReg;
 
-  Order.rewind();
-  while (unsigned PhysReg = Order.next()) {
-    LiveInterval *InterferingVReg = getSingleInterference(VirtReg, PhysReg);
-    if (!InterferingVReg)
-      continue;
-    if (TargetRegisterInfo::isPhysicalRegister(InterferingVReg->reg))
-      continue;
-    if (reassignVReg(*InterferingVReg, PhysReg))
-      return PhysReg;
-  }
-  return 0;
+  DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost
+               << '\n');
+  unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
+  return CheapReg ? CheapReg : PhysReg;
 }
 
 
@@ -323,22 +378,24 @@ unsigned RAGreedy::tryReassign(LiveInterval &VirtReg, AllocationOrder &Order,
 //===----------------------------------------------------------------------===//
 
 /// canEvict - Return true if all interferences between VirtReg and PhysReg can
-/// be evicted. Set maxWeight to the maximal spill weight of an interference.
+/// be evicted.
+/// Return false if any interference is heavier than MaxWeight.
+/// On return, set MaxWeight to the maximal spill weight of an interference.
 bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
-                                    unsigned Size, float &MaxWeight) {
+                                    float &MaxWeight) {
   float Weight = 0;
   for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
     LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
-    // If there is 10 or more interferences, chances are one is smaller.
-    if (Q.collectInterferingVRegs(10) >= 10)
+    // If there is 10 or more interferences, chances are one is heavier.
+    if (Q.collectInterferingVRegs(10, MaxWeight) >= 10)
       return false;
 
-    // CHeck if any interfering live range is shorter than VirtReg.
-    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
-      LiveInterval *Intf = Q.interferingVRegs()[i];
+    // Check if any interfering live range is heavier than MaxWeight.
+    for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+      LiveInterval *Intf = Q.interferingVRegs()[i - 1];
       if (TargetRegisterInfo::isPhysicalRegister(Intf->reg))
         return false;
-      if (Intf->getSize() <= Size)
+      if (Intf->weight >= MaxWeight)
         return false;
       Weight = std::max(Weight, Intf->weight);
     }
@@ -353,25 +410,28 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
 /// @return         Physreg to assign VirtReg, or 0.
 unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
                             AllocationOrder &Order,
-                            SmallVectorImpl<LiveInterval*> &NewVRegs){
+                            SmallVectorImpl<LiveInterval*> &NewVRegs,
+                            unsigned CostPerUseLimit) {
   NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
 
-  // We can only evict interference if all interfering registers are virtual and
-  // longer than VirtReg.
-  const unsigned Size = VirtReg.getSize();
-
   // Keep track of the lightest single interference seen so far.
-  float BestWeight = 0;
+  float BestWeight = VirtReg.weight;
   unsigned BestPhys = 0;
 
   Order.rewind();
   while (unsigned PhysReg = Order.next()) {
-    float Weight = 0;
-    if (!canEvictInterference(VirtReg, PhysReg, Size, Weight))
+    if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
+      continue;
+    // The first use of a register in a function has cost 1.
+    if (CostPerUseLimit == 1 && !MRI->isPhysRegUsed(PhysReg))
+      continue;
+
+    float Weight = BestWeight;
+    if (!canEvictInterference(VirtReg, PhysReg, Weight))
       continue;
 
     // This is an eviction candidate.
-    DEBUG(dbgs() << "max " << PrintReg(PhysReg, TRI) << " interference = "
+    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " interference = "
                  << Weight << '\n');
     if (BestPhys && Weight >= BestWeight)
       continue;
@@ -406,201 +466,228 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
 //                              Region Splitting
 //===----------------------------------------------------------------------===//
 
-/// calcInterferenceInfo - Compute per-block outgoing and ingoing constraints
-/// when considering interference from PhysReg. Also compute an optimistic local
-/// cost of this interference pattern.
-///
-/// The final cost of a split is the local cost + global cost of preferences
-/// broken by SpillPlacement.
-///
-float RAGreedy::calcInterferenceInfo(LiveInterval &VirtReg, unsigned PhysReg) {
+/// addSplitConstraints - Fill out the SplitConstraints vector based on the
+/// interference pattern in Physreg and its aliases. Add the constraints to
+/// SpillPlacement and return the static cost of this split in Cost, assuming
+/// that all preferences in SplitConstraints are met.
+/// Return false if there are no bundles with positive bias.
+bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
+                                   float &Cost) {
+  ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+
   // Reset interference dependent info.
-  SpillConstraints.resize(SA->LiveBlocks.size());
-  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
-    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
-    SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+  SplitConstraints.resize(UseBlocks.size());
+  float StaticCost = 0;
+  for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+    const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+    SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+
     BC.Number = BI.MBB->getNumber();
-    BC.Entry = (BI.Uses && BI.LiveIn) ?
-      SpillPlacement::PrefReg : SpillPlacement::DontCare;
-    BC.Exit = (BI.Uses && BI.LiveOut) ?
-      SpillPlacement::PrefReg : SpillPlacement::DontCare;
-    BI.OverlapEntry = BI.OverlapExit = false;
-  }
+    Intf.moveToBlock(BC.Number);
+    BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+    BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
 
-  // Add interference info from each PhysReg alias.
-  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
-    if (!query(VirtReg, *AI).checkInterference())
+    if (!Intf.hasInterference())
       continue;
-    LiveIntervalUnion::SegmentIter IntI =
-      PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex());
-    if (!IntI.valid())
-      continue;
-
-    // Determine which blocks have interference live in or after the last split
-    // point.
-    for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
-      SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
-      SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
-      SlotIndex Start, Stop;
-      tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
 
-      // Skip interference-free blocks.
-      if (IntI.start() >= Stop)
-        continue;
-
-      // Is the interference live-in?
-      if (BI.LiveIn) {
-        IntI.advanceTo(Start);
-        if (!IntI.valid())
-          break;
-        if (IntI.start() <= Start)
-          BC.Entry = SpillPlacement::MustSpill;
-      }
+    // Number of spill code instructions to insert.
+    unsigned Ins = 0;
+
+    // Interference for the live-in value.
+    if (BI.LiveIn) {
+      if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number))
+        BC.Entry = SpillPlacement::MustSpill, ++Ins;
+      else if (Intf.first() < BI.FirstUse)
+        BC.Entry = SpillPlacement::PrefSpill, ++Ins;
+      else if (Intf.first() < (BI.LiveThrough ? BI.LastUse : BI.Kill))
+        ++Ins;
+    }
 
-      // Is the interference overlapping the last split point?
-      if (BI.LiveOut) {
-        if (IntI.stop() < BI.LastSplitPoint)
-          IntI.advanceTo(BI.LastSplitPoint.getPrevSlot());
-        if (!IntI.valid())
-          break;
-        if (IntI.start() < Stop)
-          BC.Exit = SpillPlacement::MustSpill;
-      }
+    // Interference for the live-out value.
+    if (BI.LiveOut) {
+      if (Intf.last() >= SA->getLastSplitPoint(BC.Number))
+        BC.Exit = SpillPlacement::MustSpill, ++Ins;
+      else if (Intf.last() > BI.LastUse)
+        BC.Exit = SpillPlacement::PrefSpill, ++Ins;
+      else if (Intf.last() > (BI.LiveThrough ? BI.FirstUse : BI.Def))
+        ++Ins;
     }
 
-    // Rewind iterator and check other interferences.
-    IntI.find(VirtReg.beginIndex());
-    for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
-      SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
-      SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
-      SlotIndex Start, Stop;
-      tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+    // Accumulate the total frequency of inserted spill code.
+    if (Ins)
+      StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+  }
+  Cost = StaticCost;
 
-      // Skip interference-free blocks.
-      if (IntI.start() >= Stop)
-        continue;
+  // Add constraints for use-blocks. Note that these are the only constraints
+  // that may add a positive bias, it is downhill from here.
+  SpillPlacer->addConstraints(SplitConstraints);
+  return SpillPlacer->scanActiveBundles();
+}
 
-      // Handle transparent blocks with interference separately.
-      // Transparent blocks never incur any fixed cost.
-      if (BI.LiveThrough && !BI.Uses) {
-        IntI.advanceTo(Start);
-        if (!IntI.valid())
-          break;
-        if (IntI.start() >= Stop)
-          continue;
 
-        if (BC.Entry != SpillPlacement::MustSpill)
-          BC.Entry = SpillPlacement::PrefSpill;
-        if (BC.Exit != SpillPlacement::MustSpill)
-          BC.Exit = SpillPlacement::PrefSpill;
-        continue;
+/// addThroughConstraints - Add constraints and links to SpillPlacer from the
+/// live-through blocks in Blocks.
+void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
+                                     ArrayRef<unsigned> Blocks) {
+  const unsigned GroupSize = 8;
+  SpillPlacement::BlockConstraint BCS[GroupSize];
+  unsigned TBS[GroupSize];
+  unsigned B = 0, T = 0;
+
+  for (unsigned i = 0; i != Blocks.size(); ++i) {
+    unsigned Number = Blocks[i];
+    Intf.moveToBlock(Number);
+
+    if (!Intf.hasInterference()) {
+      assert(T < GroupSize && "Array overflow");
+      TBS[T] = Number;
+      if (++T == GroupSize) {
+        SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T));
+        T = 0;
       }
+      continue;
+    }
 
-      // Now we only have blocks with uses left.
-      // Check if the interference overlaps the uses.
-      assert(BI.Uses && "Non-transparent block without any uses");
-
-      // Check interference on entry.
-      if (BI.LiveIn && BC.Entry != SpillPlacement::MustSpill) {
-        IntI.advanceTo(Start);
-        if (!IntI.valid())
-          break;
-        // Not live in, but before the first use.
-        if (IntI.start() < BI.FirstUse) {
-          BC.Entry = SpillPlacement::PrefSpill;
-          // If the block contains a kill from an earlier split, never split
-          // again in the same block.
-          if (!BI.LiveThrough && !SA->isOriginalEndpoint(BI.Kill))
-            BC.Entry = SpillPlacement::MustSpill;
-        }
-      }
+    assert(B < GroupSize && "Array overflow");
+    BCS[B].Number = Number;
+
+    // Interference for the live-in value.
+    if (Intf.first() <= Indexes->getMBBStartIdx(Number))
+      BCS[B].Entry = SpillPlacement::MustSpill;
+    else
+      BCS[B].Entry = SpillPlacement::PrefSpill;
+
+    // Interference for the live-out value.
+    if (Intf.last() >= SA->getLastSplitPoint(Number))
+      BCS[B].Exit = SpillPlacement::MustSpill;
+    else
+      BCS[B].Exit = SpillPlacement::PrefSpill;
+
+    if (++B == GroupSize) {
+      ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
+      SpillPlacer->addConstraints(Array);
+      B = 0;
+    }
+  }
 
-      // Does interference overlap the uses in the entry segment
-      // [FirstUse;Kill)?
-      if (BI.LiveIn && !BI.OverlapEntry) {
-        IntI.advanceTo(BI.FirstUse);
-        if (!IntI.valid())
-          break;
-        // A live-through interval has no kill.
-        // Check [FirstUse;LastUse) instead.
-        if (IntI.start() < (BI.LiveThrough ? BI.LastUse : BI.Kill))
-          BI.OverlapEntry = true;
-      }
+  ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
+  SpillPlacer->addConstraints(Array);
+  SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T));
+}
 
-      // Does interference overlap the uses in the exit segment [Def;LastUse)?
-      if (BI.LiveOut && !BI.LiveThrough && !BI.OverlapExit) {
-        IntI.advanceTo(BI.Def);
-        if (!IntI.valid())
-          break;
-        if (IntI.start() < BI.LastUse)
-          BI.OverlapExit = true;
-      }
+void RAGreedy::growRegion(GlobalSplitCandidate &Cand,
+                          InterferenceCache::Cursor Intf) {
+  // Keep track of through blocks that have not been added to SpillPlacer.
+  BitVector Todo = SA->getThroughBlocks();
+  SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
+  unsigned AddedTo = 0;
+#ifndef NDEBUG
+  unsigned Visited = 0;
+#endif
 
-      // Check interference on exit.
-      if (BI.LiveOut && BC.Exit != SpillPlacement::MustSpill) {
-        // Check interference between LastUse and Stop.
-        if (BC.Exit != SpillPlacement::PrefSpill) {
-          IntI.advanceTo(BI.LastUse);
-          if (!IntI.valid())
-            break;
-          if (IntI.start() < Stop) {
-            BC.Exit = SpillPlacement::PrefSpill;
-            // Avoid splitting twice in the same block.
-            if (!BI.LiveThrough && !SA->isOriginalEndpoint(BI.Def))
-              BC.Exit = SpillPlacement::MustSpill;
-          }
-        }
+  for (;;) {
+    ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
+    if (NewBundles.empty())
+      break;
+    // Find new through blocks in the periphery of PrefRegBundles.
+    for (int i = 0, e = NewBundles.size(); i != e; ++i) {
+      unsigned Bundle = NewBundles[i];
+      // Look at all blocks connected to Bundle in the full graph.
+      ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
+      for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
+           I != E; ++I) {
+        unsigned Block = *I;
+        if (!Todo.test(Block))
+          continue;
+        Todo.reset(Block);
+        // This is a new through block. Add it to SpillPlacer later.
+        ActiveBlocks.push_back(Block);
+#ifndef NDEBUG
+        ++Visited;
+#endif
       }
     }
+    // Any new blocks to add?
+    if (ActiveBlocks.size() > AddedTo) {
+      ArrayRef<unsigned> Add(&ActiveBlocks[AddedTo],
+                             ActiveBlocks.size() - AddedTo);
+      addThroughConstraints(Intf, Add);
+      AddedTo = ActiveBlocks.size();
+    }
+    // Perhaps iterating can enable more bundles?
+    SpillPlacer->iterate();
   }
+  DEBUG(dbgs() << ", v=" << Visited);
+}
 
-  // Accumulate a local cost of this interference pattern.
-  float LocalCost = 0;
-  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
-    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
-    if (!BI.Uses)
-      continue;
-    SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
-    unsigned Inserts = 0;
-
-    // Do we need spill code for the entry segment?
-    if (BI.LiveIn)
-      Inserts += BI.OverlapEntry || BC.Entry != SpillPlacement::PrefReg;
-
-    // For the exit segment?
-    if (BI.LiveOut)
-      Inserts += BI.OverlapExit || BC.Exit != SpillPlacement::PrefReg;
-
-    // The local cost of spill code in this block is the block frequency times
-    // the number of spill instructions inserted.
-    if (Inserts)
-      LocalCost += Inserts * SpillPlacer->getBlockFrequency(BI.MBB);
+/// calcSpillCost - Compute how expensive it would be to split the live range in
+/// SA around all use blocks instead of forming bundle regions.
+float RAGreedy::calcSpillCost() {
+  float Cost = 0;
+  const LiveInterval &LI = SA->getParent();
+  ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+  for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+    const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+    unsigned Number = BI.MBB->getNumber();
+    // We normally only need one spill instruction - a load or a store.
+    Cost += SpillPlacer->getBlockFrequency(Number);
+
+    // Unless the value is redefined in the block.
+    if (BI.LiveIn && BI.LiveOut) {
+      SlotIndex Start, Stop;
+      tie(Start, Stop) = Indexes->getMBBRange(Number);
+      LiveInterval::const_iterator I = LI.find(Start);
+      assert(I != LI.end() && "Expected live-in value");
+      // Is there a different live-out value? If so, we need an extra spill
+      // instruction.
+      if (I->end < Stop)
+        Cost += SpillPlacer->getBlockFrequency(Number);
+    }
   }
-  DEBUG(dbgs() << "Local cost of " << PrintReg(PhysReg, TRI) << " = "
-               << LocalCost << '\n');
-  return LocalCost;
+  return Cost;
 }
 
 /// calcGlobalSplitCost - Return the global split cost of following the split
 /// pattern in LiveBundles. This cost should be added to the local cost of the
-/// interference pattern in SpillConstraints.
+/// interference pattern in SplitConstraints.
 ///
-float RAGreedy::calcGlobalSplitCost(const BitVector &LiveBundles) {
+float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
+                                    InterferenceCache::Cursor Intf) {
   float GlobalCost = 0;
-  for (unsigned i = 0, e = SpillConstraints.size(); i != e; ++i) {
-    SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
-    unsigned Inserts = 0;
-    // Broken entry preference?
-    Inserts += LiveBundles[Bundles->getBundle(BC.Number, 0)] !=
-                 (BC.Entry == SpillPlacement::PrefReg);
-    // Broken exit preference?
-    Inserts += LiveBundles[Bundles->getBundle(BC.Number, 1)] !=
-                 (BC.Exit == SpillPlacement::PrefReg);
-    if (Inserts)
-      GlobalCost +=
-        Inserts * SpillPlacer->getBlockFrequency(SA->LiveBlocks[i].MBB);
+  const BitVector &LiveBundles = Cand.LiveBundles;
+  ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+  for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+    const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+    SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+    bool RegIn  = LiveBundles[Bundles->getBundle(BC.Number, 0)];
+    bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)];
+    unsigned Ins = 0;
+
+    if (BI.LiveIn)
+      Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
+    if (BI.LiveOut)
+      Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
+    if (Ins)
+      GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+  }
+
+  for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
+    unsigned Number = Cand.ActiveBlocks[i];
+    bool RegIn  = LiveBundles[Bundles->getBundle(Number, 0)];
+    bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
+    if (!RegIn && !RegOut)
+      continue;
+    if (RegIn && RegOut) {
+      // We need double spill code if this block has interference.
+      Intf.moveToBlock(Number);
+      if (Intf.hasInterference())
+        GlobalCost += 2*SpillPlacer->getBlockFrequency(Number);
+      continue;
+    }
+    // live-in / stack-out or stack-in live-out.
+    GlobalCost += SpillPlacer->getBlockFrequency(Number);
   }
-  DEBUG(dbgs() << "Global cost = " << GlobalCost << '\n');
   return GlobalCost;
 }
 
@@ -611,113 +698,74 @@ float RAGreedy::calcGlobalSplitCost(const BitVector &LiveBundles) {
 /// avoiding interference. The 'stack' interval is the complement constructed by
 /// SplitEditor. It will contain the rest.
 ///
-void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
-                                 const BitVector &LiveBundles,
+void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
+                                 GlobalSplitCandidate &Cand,
                                  SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  const BitVector &LiveBundles = Cand.LiveBundles;
+
   DEBUG({
-    dbgs() << "Splitting around region for " << PrintReg(PhysReg, TRI)
+    dbgs() << "Splitting around region for " << PrintReg(Cand.PhysReg, TRI)
            << " with bundles";
     for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i))
       dbgs() << " EB#" << i;
     dbgs() << ".\n";
   });
 
-  // First compute interference ranges in the live blocks.
-  typedef std::pair<SlotIndex, SlotIndex> IndexPair;
-  SmallVector<IndexPair, 8> InterferenceRanges;
-  InterferenceRanges.resize(SA->LiveBlocks.size());
-  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
-    if (!query(VirtReg, *AI).checkInterference())
-      continue;
-    LiveIntervalUnion::SegmentIter IntI =
-      PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex());
-    if (!IntI.valid())
-      continue;
-    for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
-      const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
-      IndexPair &IP = InterferenceRanges[i];
-      SlotIndex Start, Stop;
-      tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
-      // Skip interference-free blocks.
-      if (IntI.start() >= Stop)
-        continue;
-
-      // First interference in block.
-      if (BI.LiveIn) {
-        IntI.advanceTo(Start);
-        if (!IntI.valid())
-          break;
-        if (IntI.start() >= Stop)
-          continue;
-        if (!IP.first.isValid() || IntI.start() < IP.first)
-          IP.first = IntI.start();
-      }
-
-      // Last interference in block.
-      if (BI.LiveOut) {
-        IntI.advanceTo(Stop);
-        if (!IntI.valid() || IntI.start() >= Stop)
-          --IntI;
-        if (IntI.stop() <= Start)
-          continue;
-        if (!IP.second.isValid() || IntI.stop() > IP.second)
-          IP.second = IntI.stop();
-      }
-    }
-  }
-
-  SmallVector<LiveInterval*, 4> SpillRegs;
-  LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
-  SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit);
+  InterferenceCache::Cursor Intf(IntfCache, Cand.PhysReg);
+  LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+  SE->reset(LREdit);
 
   // Create the main cross-block interval.
-  SE.openIntv();
+  const unsigned MainIntv = SE->openIntv();
 
   // First add all defs that are live out of a block.
-  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
-    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+  ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+  for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+    const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
     bool RegIn  = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
     bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
 
+    // Create separate intervals for isolated blocks with multiple uses.
+    if (!RegIn && !RegOut && BI.FirstUse != BI.LastUse) {
+      DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
+      SE->splitSingleBlock(BI);
+      SE->selectIntv(MainIntv);
+      continue;
+    }
+
     // Should the register be live out?
     if (!BI.LiveOut || !RegOut)
       continue;
 
-    IndexPair &IP = InterferenceRanges[i];
     SlotIndex Start, Stop;
     tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
-
+    Intf.moveToBlock(BI.MBB->getNumber());
     DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#"
                  << Bundles->getBundle(BI.MBB->getNumber(), 1)
-                 << " intf [" << IP.first << ';' << IP.second << ')');
+                 << " [" << Start << ';'
+                 << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop
+                 << ") intf [" << Intf.first() << ';' << Intf.last() << ')');
 
     // The interference interval should either be invalid or overlap MBB.
-    assert((!IP.first.isValid() || IP.first < Stop) && "Bad interference");
-    assert((!IP.second.isValid() || IP.second > Start) && "Bad interference");
+    assert((!Intf.hasInterference() || Intf.first() < Stop)
+           && "Bad interference");
+    assert((!Intf.hasInterference() || Intf.last() > Start)
+           && "Bad interference");
 
     // Check interference leaving the block.
-    if (!IP.second.isValid()) {
+    if (!Intf.hasInterference()) {
       // Block is interference-free.
       DEBUG(dbgs() << ", no interference");
-      if (!BI.Uses) {
-        assert(BI.LiveThrough && "No uses, but not live through block?");
-        // Block is live-through without interference.
-        DEBUG(dbgs() << ", no uses"
-                     << (RegIn ? ", live-through.\n" : ", stack in.\n"));
-        if (!RegIn)
-          SE.enterIntvAtEnd(*BI.MBB);
-        continue;
-      }
       if (!BI.LiveThrough) {
         DEBUG(dbgs() << ", not live-through.\n");
-        SE.useIntv(SE.enterIntvBefore(BI.Def), Stop);
+        SE->useIntv(SE->enterIntvBefore(BI.Def), Stop);
         continue;
       }
       if (!RegIn) {
         // Block is live-through, but entry bundle is on the stack.
         // Reload just before the first use.
         DEBUG(dbgs() << ", not live-in, enter before first use.\n");
-        SE.useIntv(SE.enterIntvBefore(BI.FirstUse), Stop);
+        SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop);
         continue;
       }
       DEBUG(dbgs() << ", live-through.\n");
@@ -725,53 +773,45 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
     }
 
     // Block has interference.
-    DEBUG(dbgs() << ", interference to " << IP.second);
+    DEBUG(dbgs() << ", interference to " << Intf.last());
 
-    if (!BI.LiveThrough && IP.second <= BI.Def) {
+    if (!BI.LiveThrough && Intf.last() <= BI.Def) {
       // The interference doesn't reach the outgoing segment.
       DEBUG(dbgs() << " doesn't affect def from " << BI.Def << '\n');
-      SE.useIntv(BI.Def, Stop);
+      SE->useIntv(BI.Def, Stop);
       continue;
     }
 
-
-    if (!BI.Uses) {
-      // No uses in block, avoid interference by reloading as late as possible.
-      DEBUG(dbgs() << ", no uses.\n");
-      SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB);
-      assert(SegStart >= IP.second && "Couldn't avoid interference");
-      continue;
-    }
-
-    if (IP.second.getBoundaryIndex() < BI.LastUse) {
+    SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber());
+    if (Intf.last().getBoundaryIndex() < BI.LastUse) {
       // There are interference-free uses at the end of the block.
       // Find the first use that can get the live-out register.
       SmallVectorImpl<SlotIndex>::const_iterator UI =
         std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
-                         IP.second.getBoundaryIndex());
+                         Intf.last().getBoundaryIndex());
       assert(UI != SA->UseSlots.end() && "Couldn't find last use");
       SlotIndex Use = *UI;
       assert(Use <= BI.LastUse && "Couldn't find last use");
       // Only attempt a split befroe the last split point.
-      if (Use.getBaseIndex() <= BI.LastSplitPoint) {
+      if (Use.getBaseIndex() <= LastSplitPoint) {
         DEBUG(dbgs() << ", free use at " << Use << ".\n");
-        SlotIndex SegStart = SE.enterIntvBefore(Use);
-        assert(SegStart >= IP.second && "Couldn't avoid interference");
-        assert(SegStart < BI.LastSplitPoint && "Impossible split point");
-        SE.useIntv(SegStart, Stop);
+        SlotIndex SegStart = SE->enterIntvBefore(Use);
+        assert(SegStart >= Intf.last() && "Couldn't avoid interference");
+        assert(SegStart < LastSplitPoint && "Impossible split point");
+        SE->useIntv(SegStart, Stop);
         continue;
       }
     }
 
     // Interference is after the last use.
     DEBUG(dbgs() << " after last use.\n");
-    SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB);
-    assert(SegStart >= IP.second && "Couldn't avoid interference");
+    SlotIndex SegStart = SE->enterIntvAtEnd(*BI.MBB);
+    assert(SegStart >= Intf.last() && "Couldn't avoid interference");
   }
 
   // Now all defs leading to live bundles are handled, do everything else.
-  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
-    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+  for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+    const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
     bool RegIn  = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
     bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
 
@@ -780,152 +820,207 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
       continue;
 
     // We have an incoming register. Check for interference.
-    IndexPair &IP = InterferenceRanges[i];
     SlotIndex Start, Stop;
     tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
-
+    Intf.moveToBlock(BI.MBB->getNumber());
     DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0)
-                 << " -> BB#" << BI.MBB->getNumber());
+                 << " -> BB#" << BI.MBB->getNumber() << " [" << Start << ';'
+                 << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop
+                 << ')');
 
     // Check interference entering the block.
-    if (!IP.first.isValid()) {
+    if (!Intf.hasInterference()) {
       // Block is interference-free.
       DEBUG(dbgs() << ", no interference");
-      if (!BI.Uses) {
-        assert(BI.LiveThrough && "No uses, but not live through block?");
-        // Block is live-through without interference.
-        if (RegOut) {
-          DEBUG(dbgs() << ", no uses, live-through.\n");
-          SE.useIntv(Start, Stop);
-        } else {
-          DEBUG(dbgs() << ", no uses, stack-out.\n");
-          SE.leaveIntvAtTop(*BI.MBB);
-        }
-        continue;
-      }
       if (!BI.LiveThrough) {
         DEBUG(dbgs() << ", killed in block.\n");
-        SE.useIntv(Start, SE.leaveIntvAfter(BI.Kill));
+        SE->useIntv(Start, SE->leaveIntvAfter(BI.Kill));
         continue;
       }
       if (!RegOut) {
+        SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber());
         // Block is live-through, but exit bundle is on the stack.
         // Spill immediately after the last use.
-        if (BI.LastUse < BI.LastSplitPoint) {
+        if (BI.LastUse < LastSplitPoint) {
           DEBUG(dbgs() << ", uses, stack-out.\n");
-          SE.useIntv(Start, SE.leaveIntvAfter(BI.LastUse));
+          SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse));
           continue;
         }
         // The last use is after the last split point, it is probably an
         // indirect jump.
         DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point "
-                     << BI.LastSplitPoint << ", stack-out.\n");
-        SlotIndex SegEnd = SE.leaveIntvBefore(BI.LastSplitPoint);
-        SE.useIntv(Start, SegEnd);
+                     << LastSplitPoint << ", stack-out.\n");
+        SlotIndex SegEnd = SE->leaveIntvBefore(LastSplitPoint);
+        SE->useIntv(Start, SegEnd);
         // Run a double interval from the split to the last use.
         // This makes it possible to spill the complement without affecting the
         // indirect branch.
-        SE.overlapIntv(SegEnd, BI.LastUse);
+        SE->overlapIntv(SegEnd, BI.LastUse);
         continue;
       }
       // Register is live-through.
       DEBUG(dbgs() << ", uses, live-through.\n");
-      SE.useIntv(Start, Stop);
+      SE->useIntv(Start, Stop);
       continue;
     }
 
     // Block has interference.
-    DEBUG(dbgs() << ", interference from " << IP.first);
+    DEBUG(dbgs() << ", interference from " << Intf.first());
 
-    if (!BI.LiveThrough && IP.first >= BI.Kill) {
+    if (!BI.LiveThrough && Intf.first() >= BI.Kill) {
       // The interference doesn't reach the outgoing segment.
       DEBUG(dbgs() << " doesn't affect kill at " << BI.Kill << '\n');
-      SE.useIntv(Start, BI.Kill);
+      SE->useIntv(Start, BI.Kill);
       continue;
     }
 
-    if (!BI.Uses) {
-      // No uses in block, avoid interference by spilling as soon as possible.
-      DEBUG(dbgs() << ", no uses.\n");
-      SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB);
-      assert(SegEnd <= IP.first && "Couldn't avoid interference");
-      continue;
-    }
-    if (IP.first.getBaseIndex() > BI.FirstUse) {
+    if (Intf.first().getBaseIndex() > BI.FirstUse) {
       // There are interference-free uses at the beginning of the block.
       // Find the last use that can get the register.
       SmallVectorImpl<SlotIndex>::const_iterator UI =
         std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
-                         IP.first.getBaseIndex());
+                         Intf.first().getBaseIndex());
       assert(UI != SA->UseSlots.begin() && "Couldn't find first use");
       SlotIndex Use = (--UI)->getBoundaryIndex();
       DEBUG(dbgs() << ", free use at " << *UI << ".\n");
-      SlotIndex SegEnd = SE.leaveIntvAfter(Use);
-      assert(SegEnd <= IP.first && "Couldn't avoid interference");
-      SE.useIntv(Start, SegEnd);
+      SlotIndex SegEnd = SE->leaveIntvAfter(Use);
+      assert(SegEnd <= Intf.first() && "Couldn't avoid interference");
+      SE->useIntv(Start, SegEnd);
       continue;
     }
 
     // Interference is before the first use.
     DEBUG(dbgs() << " before first use.\n");
-    SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB);
-    assert(SegEnd <= IP.first && "Couldn't avoid interference");
+    SlotIndex SegEnd = SE->leaveIntvAtTop(*BI.MBB);
+    assert(SegEnd <= Intf.first() && "Couldn't avoid interference");
   }
 
-  SE.closeIntv();
+  // Handle live-through blocks.
+  for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
+    unsigned Number = Cand.ActiveBlocks[i];
+    bool RegIn  = LiveBundles[Bundles->getBundle(Number, 0)];
+    bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
+    DEBUG(dbgs() << "Live through BB#" << Number << '\n');
+    if (RegIn && RegOut) {
+      Intf.moveToBlock(Number);
+      if (!Intf.hasInterference()) {
+        SE->useIntv(Indexes->getMBBStartIdx(Number),
+                    Indexes->getMBBEndIdx(Number));
+        continue;
+      }
+    }
+    MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
+    if (RegIn)
+      SE->leaveIntvAtTop(*MBB);
+    if (RegOut)
+      SE->enterIntvAtEnd(*MBB);
+  }
 
-  // FIXME: Should we be more aggressive about splitting the stack region into
-  // per-block segments? The current approach allows the stack region to
-  // separate into connected components. Some components may be allocatable.
-  SE.finish();
   ++NumGlobalSplits;
 
-  if (VerifyEnabled) {
-    MF->verify(this, "After splitting live range around region");
+  SmallVector<unsigned, 8> IntvMap;
+  SE->finish(&IntvMap);
+  LRStage.resize(MRI->getNumVirtRegs());
+  unsigned OrigBlocks = SA->getNumThroughBlocks() + SA->getUseBlocks().size();
+
+  // Sort out the new intervals created by splitting. We get four kinds:
+  // - Remainder intervals should not be split again.
+  // - Candidate intervals can be assigned to Cand.PhysReg.
+  // - Block-local splits are candidates for local splitting.
+  // - DCE leftovers should go back on the queue.
+  for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
+    unsigned Reg = LREdit.get(i)->reg;
+
+    // Ignore old intervals from DCE.
+    if (LRStage[Reg] != RS_New)
+      continue;
 
-#ifndef NDEBUG
-    // Make sure that at least one of the new intervals can allocate to PhysReg.
-    // That was the whole point of splitting the live range.
-    bool found = false;
-    for (LiveRangeEdit::iterator I = LREdit.begin(), E = LREdit.end(); I != E;
-         ++I)
-      if (!checkUncachedInterference(**I, PhysReg)) {
-        found = true;
-        break;
+    // Remainder interval. Don't try splitting again, spill if it doesn't
+    // allocate.
+    if (IntvMap[i] == 0) {
+      LRStage[Reg] = RS_Global;
+      continue;
+    }
+
+    // Main interval. Allow repeated splitting as long as the number of live
+    // blocks is strictly decreasing.
+    if (IntvMap[i] == MainIntv) {
+      if (SA->countLiveBlocks(LREdit.get(i)) >= OrigBlocks) {
+        DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
+                     << " blocks as original.\n");
+        // Don't allow repeated splitting as a safe guard against looping.
+        LRStage[Reg] = RS_Global;
       }
-    assert(found && "No allocatable intervals after pointless splitting");
-#endif
+      continue;
+    }
+
+    // Other intervals are treated as new. This includes local intervals created
+    // for blocks with multiple uses, and anything created by DCE.
   }
+
+  if (VerifyEnabled)
+    MF->verify(this, "After splitting live range around region");
 }
 
 unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
                                   SmallVectorImpl<LiveInterval*> &NewVRegs) {
-  BitVector LiveBundles, BestBundles;
-  float BestCost = 0;
-  unsigned BestReg = 0;
+  float BestCost = Hysteresis * calcSpillCost();
+  DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
+  const unsigned NoCand = ~0u;
+  unsigned BestCand = NoCand;
+
   Order.rewind();
-  while (unsigned PhysReg = Order.next()) {
-    float Cost = calcInterferenceInfo(VirtReg, PhysReg);
-    if (BestReg && Cost >= BestCost)
+  for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) {
+    if (GlobalCand.size() <= Cand)
+      GlobalCand.resize(Cand+1);
+    GlobalCand[Cand].reset(PhysReg);
+
+    SpillPlacer->prepare(GlobalCand[Cand].LiveBundles);
+    float Cost;
+    InterferenceCache::Cursor Intf(IntfCache, PhysReg);
+    if (!addSplitConstraints(Intf, Cost)) {
+      DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
+      continue;
+    }
+    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost);
+    if (Cost >= BestCost) {
+      DEBUG({
+        if (BestCand == NoCand)
+          dbgs() << " worse than no bundles\n";
+        else
+          dbgs() << " worse than "
+                 << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
+      });
       continue;
+    }
+    growRegion(GlobalCand[Cand], Intf);
+
+    SpillPlacer->finish();
 
-    SpillPlacer->placeSpills(SpillConstraints, LiveBundles);
     // No live bundles, defer to splitSingleBlocks().
-    if (!LiveBundles.any())
+    if (!GlobalCand[Cand].LiveBundles.any()) {
+      DEBUG(dbgs() << " no bundles.\n");
       continue;
+    }
 
-    Cost += calcGlobalSplitCost(LiveBundles);
-    if (!BestReg || Cost < BestCost) {
-      BestReg = PhysReg;
-      BestCost = Cost;
-      BestBundles.swap(LiveBundles);
+    Cost += calcGlobalSplitCost(GlobalCand[Cand], Intf);
+    DEBUG({
+      dbgs() << ", total = " << Cost << " with bundles";
+      for (int i = GlobalCand[Cand].LiveBundles.find_first(); i>=0;
+           i = GlobalCand[Cand].LiveBundles.find_next(i))
+        dbgs() << " EB#" << i;
+      dbgs() << ".\n";
+    });
+    if (Cost < BestCost) {
+      BestCand = Cand;
+      BestCost = Hysteresis * Cost; // Prevent rounding effects.
     }
   }
 
-  if (!BestReg)
+  if (BestCand == NoCand)
     return 0;
 
-  splitAroundRegion(VirtReg, BestReg, BestBundles, NewVRegs);
+  splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs);
   return 0;
 }
 
@@ -942,8 +1037,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
 ///
 void RAGreedy::calcGapWeights(unsigned PhysReg,
                               SmallVectorImpl<float> &GapWeight) {
-  assert(SA->LiveBlocks.size() == 1 && "Not a local interval");
-  const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front();
+  assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+  const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
   const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
   const unsigned NumGaps = Uses.size()-1;
 
@@ -1034,8 +1129,8 @@ unsigned RAGreedy::nextSplitPoint(unsigned i) {
 ///
 unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
                                  SmallVectorImpl<LiveInterval*> &NewVRegs) {
-  assert(SA->LiveBlocks.size() == 1 && "Not a local interval");
-  const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front();
+  assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+  const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
 
   // Note that it is possible to have an interval that is live-in or live-out
   // while only covering a single block - A phi-def can use undef values from
@@ -1065,7 +1160,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   unsigned BestAfter = 0;
   float BestDiff = 0;
 
-  const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB);
+  const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber());
   SmallVector<float, 8> GapWeight;
 
   Order.rewind();
@@ -1130,13 +1225,13 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
                               PrevSlot[SplitBefore].distance(Uses[SplitAfter]));
         // Would this split be possible to allocate?
         // Never allocate all gaps, we wouldn't be making progress.
-        float Diff = EstWeight - MaxGap;
-        DEBUG(dbgs() << " w=" << EstWeight << " d=" << Diff);
-        if (Diff > 0) {
+        DEBUG(dbgs() << " w=" << EstWeight);
+        if (EstWeight * Hysteresis >= MaxGap) {
           Shrink = false;
+          float Diff = EstWeight - MaxGap;
           if (Diff > BestDiff) {
             DEBUG(dbgs() << " (best)");
-            BestDiff = Diff;
+            BestDiff = Hysteresis * Diff;
             BestBefore = SplitBefore;
             BestAfter = SplitAfter;
           }
@@ -1181,16 +1276,15 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
                << '-' << Uses[BestAfter] << ", " << BestDiff
                << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
 
-  SmallVector<LiveInterval*, 4> SpillRegs;
-  LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
-  SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit);
+  LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+  SE->reset(LREdit);
 
-  SE.openIntv();
-  SlotIndex SegStart = SE.enterIntvBefore(Uses[BestBefore]);
-  SlotIndex SegStop  = SE.leaveIntvAfter(Uses[BestAfter]);
-  SE.useIntv(SegStart, SegStop);
-  SE.closeIntv();
-  SE.finish();
+  SE->openIntv();
+  SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
+  SlotIndex SegStop  = SE->leaveIntvAfter(Uses[BestAfter]);
+  SE->useIntv(SegStart, SegStop);
+  SE->finish();
+  setStage(NewVRegs.begin(), NewVRegs.end(), RS_Local);
   ++NumLocalSplits;
 
   return 0;
@@ -1205,16 +1299,22 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
 unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
                             SmallVectorImpl<LiveInterval*>&NewVRegs) {
-  SA->analyze(&VirtReg);
-
   // Local intervals are handled separately.
   if (LIS->intervalIsInOneMBB(VirtReg)) {
     NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
+    SA->analyze(&VirtReg);
     return tryLocalSplit(VirtReg, Order, NewVRegs);
   }
 
   NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
 
+  // Don't iterate global splitting.
+  // Move straight to spilling if this range was produced by a global split.
+  if (getStage(VirtReg) >= RS_Global)
+    return 0;
+
+  SA->analyze(&VirtReg);
+
   // First try to split around a region spanning multiple blocks.
   unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
   if (PhysReg || !NewVRegs.empty())
@@ -1223,9 +1323,10 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
   // Then isolate blocks with multiple uses.
   SplitAnalysis::BlockPtrSet Blocks;
   if (SA->getMultiUseBlocks(Blocks)) {
-    SmallVector<LiveInterval*, 4> SpillRegs;
-    LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
-    SplitEditor(*SA, *LIS, *VRM, *DomTree, LREdit).splitSingleBlocks(Blocks);
+    LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+    SE->reset(LREdit);
+    SE->splitSingleBlocks(Blocks);
+    setStage(NewVRegs.begin(), NewVRegs.end(), RS_Global);
     if (VerifyEnabled)
       MF->verify(this, "After splitting live range around basic blocks");
   }
@@ -1236,68 +1337,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
 
 
 //===----------------------------------------------------------------------===//
-//                                Spilling
-//===----------------------------------------------------------------------===//
-
-/// calcInterferenceWeight - Calculate the combined spill weight of
-/// interferences when assigning VirtReg to PhysReg.
-float RAGreedy::calcInterferenceWeight(LiveInterval &VirtReg, unsigned PhysReg){
-  float Sum = 0;
-  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
-    LiveIntervalUnion::Query &Q = query(VirtReg, *AI);
-    Q.collectInterferingVRegs();
-    if (Q.seenUnspillableVReg())
-      return HUGE_VALF;
-    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i)
-      Sum += Q.interferingVRegs()[i]->weight;
-  }
-  return Sum;
-}
-
-/// trySpillInterferences - Try to spill interfering registers instead of the
-/// current one. Only do it if the accumulated spill weight is smaller than the
-/// current spill weight.
-unsigned RAGreedy::trySpillInterferences(LiveInterval &VirtReg,
-                                         AllocationOrder &Order,
-                                     SmallVectorImpl<LiveInterval*> &NewVRegs) {
-  NamedRegionTimer T("Spill Interference", TimerGroupName, TimePassesIsEnabled);
-  unsigned BestPhys = 0;
-  float BestWeight = 0;
-
-  Order.rewind();
-  while (unsigned PhysReg = Order.next()) {
-    float Weight = calcInterferenceWeight(VirtReg, PhysReg);
-    if (Weight == HUGE_VALF || Weight >= VirtReg.weight)
-      continue;
-    if (!BestPhys || Weight < BestWeight)
-      BestPhys = PhysReg, BestWeight = Weight;
-  }
-
-  // No candidates found.
-  if (!BestPhys)
-    return 0;
-
-  // Collect all interfering registers.
-  SmallVector<LiveInterval*, 8> Spills;
-  for (const unsigned *AI = TRI->getOverlaps(BestPhys); *AI; ++AI) {
-    LiveIntervalUnion::Query &Q = query(VirtReg, *AI);
-    Spills.append(Q.interferingVRegs().begin(), Q.interferingVRegs().end());
-    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
-      LiveInterval *VReg = Q.interferingVRegs()[i];
-      unassign(*VReg, *AI);
-    }
-  }
-
-  // Spill them all.
-  DEBUG(dbgs() << "spilling " << Spills.size() << " interferences with weight "
-               << BestWeight << '\n');
-  for (unsigned i = 0, e = Spills.size(); i != e; ++i)
-    spiller().spill(Spills[i], NewVRegs, Spills);
-  return BestPhys;
-}
-
-
-//===----------------------------------------------------------------------===//
 //                            Main Entry Point
 //===----------------------------------------------------------------------===//
 
@@ -1305,12 +1344,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
                                  SmallVectorImpl<LiveInterval*> &NewVRegs) {
   // First try assigning a free register.
   AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs);
-  while (unsigned PhysReg = Order.next()) {
-    if (!checkPhysRegInterference(VirtReg, PhysReg))
-      return PhysReg;
-  }
-
-  if (unsigned PhysReg = tryReassign(VirtReg, Order, NewVRegs))
+  if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
     return PhysReg;
 
   if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
@@ -1321,25 +1355,29 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
   // The first time we see a live range, don't try to split or spill.
   // Wait until the second time, when all smaller ranges have been allocated.
   // This gives a better picture of the interference to split around.
-  if (Generation[VirtReg.reg] == 1) {
+  LiveRangeStage Stage = getStage(VirtReg);
+  if (Stage == RS_First) {
+    LRStage[VirtReg.reg] = RS_Second;
+    DEBUG(dbgs() << "wait for second round\n");
     NewVRegs.push_back(&VirtReg);
     return 0;
   }
 
+  assert(Stage < RS_Spill && "Cannot allocate after spilling");
+
   // Try splitting VirtReg or interferences.
   unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
   if (PhysReg || !NewVRegs.empty())
     return PhysReg;
 
-  // Try to spill another interfering reg with less spill weight.
-  PhysReg = trySpillInterferences(VirtReg, Order, NewVRegs);
-  if (PhysReg)
-    return PhysReg;
-
   // Finally spill VirtReg itself.
   NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
-  SmallVector<LiveInterval*, 1> pendingSpills;
-  spiller().spill(&VirtReg, NewVRegs, pendingSpills);
+  LiveRangeEdit LRE(VirtReg, NewVRegs, this);
+  spiller().spill(LRE);
+  setStage(NewVRegs.begin(), NewVRegs.end(), RS_Spill);
+
+  if (VerifyEnabled)
+    MF->verify(this, "After spilling");
 
   // The live virtual register requesting allocation was spilled, so tell
   // the caller not to allocate anything during this round.
@@ -1366,6 +1404,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
   SpillPlacer = &getAnalysis<SpillPlacement>();
 
   SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
+  SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree));
+  LRStage.clear();
+  LRStage.resize(MRI->getNumVirtRegs());
+  IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI);
 
   allocatePhysRegs();
   addMBBLiveIns(MF);
@@ -1377,6 +1419,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
     VRM->rewrite(Indexes);
   }
 
+  // Write out new DBG_VALUE instructions.
+  getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
+
   // The pass output is in VirtRegMap. Release all the transient data.
   releaseMemory();
 
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index b959878..5ef88cb 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -13,6 +13,7 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "LiveDebugVariables.h"
+#include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
 #include "VirtRegRewriter.h"
 #include "Spiller.h"
@@ -39,7 +40,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
-#include <set>
 #include <queue>
 #include <memory>
 #include <cmath>
@@ -66,6 +66,11 @@ TrivCoalesceEnds("trivial-coalesce-ends",
                   cl::desc("Attempt trivial coalescing of interval ends"),
                   cl::init(false), cl::Hidden);
 
+static cl::opt<bool>
+AvoidWAWHazard("avoid-waw-hazard",
+               cl::desc("Avoid write-write hazards for some register classes"),
+               cl::init(false), cl::Hidden);
+
 static RegisterRegAlloc
 linearscanRegAlloc("linearscan", "linear scan register allocator",
                    createLinearScanRegisterAllocator);
@@ -109,6 +114,7 @@ namespace {
       if (NumRecentlyUsedRegs > 0)
         RecentRegs.resize(NumRecentlyUsedRegs, 0);
       RecentNext = RecentRegs.begin();
+      avoidWAW_ = 0;
     }
 
     typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
@@ -179,6 +185,9 @@ namespace {
     SmallVector<unsigned, 4> RecentRegs;
     SmallVector<unsigned, 4>::iterator RecentNext;
 
+    // Last write-after-write register written.
+    unsigned avoidWAW_;
+
     // Record that we just picked this register.
     void recordRecentlyUsed(unsigned reg) {
       assert(reg != 0 && "Recently used register is NOREG!");
@@ -226,8 +235,8 @@ namespace {
 
     // Determine if we skip this register due to its being recently used.
     bool isRecentlyUsed(unsigned reg) const {
-      return std::find(RecentRegs.begin(), RecentRegs.end(), reg) !=
-             RecentRegs.end();
+      return reg == avoidWAW_ ||
+       std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end();
     }
 
   private:
@@ -374,7 +383,7 @@ namespace {
             dbgs() << str << " intervals:\n";
 
           for (; i != e; ++i) {
-            dbgs() << "\t" << *i->first << " -> ";
+            dbgs() << '\t' << *i->first << " -> ";
 
             unsigned reg = i->first->reg;
             if (TargetRegisterInfo::isVirtualRegister(reg))
@@ -389,7 +398,7 @@ namespace {
 }
 
 INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc",
-                "Linear Scan Register Allocator", false, false)
+                      "Linear Scan Register Allocator", false, false)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
 INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights)
@@ -400,7 +409,7 @@ INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
 INITIALIZE_AG_DEPENDENCY(RegisterCoalescer)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc",
-                "Linear Scan Register Allocator", false, false)
+                    "Linear Scan Register Allocator", false, false)
 
 void RALinScan::ComputeRelatedRegClasses() {
   // First pass, add all reg classes to the union, and determine at least one
@@ -458,7 +467,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
   const LiveRange &range = cur.ranges.front();
 
   VNInfo *vni = range.valno;
-  if (vni->isUnused())
+  if (vni->isUnused() || !vni->def.isValid())
     return Reg;
 
   unsigned CandReg;
@@ -571,7 +580,7 @@ void RALinScan::initIntervalSets()
 
   for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
     if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) {
-      if (!i->second->empty()) {
+      if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) {
         mri_->setPhysRegUsed(i->second->reg);
         fixed_.push_back(std::make_pair(i->second, i->second->begin()));
       }
@@ -791,7 +800,7 @@ void RALinScan::updateSpillWeights(std::vector<float> &Weights,
   // register class we are trying to allocate. Then add the weight to all
   // sub-registers of the super-register even if they are not aliases.
   // e.g. allocating for GR32, bh is not used, updating bl spill weight.
-  //      bl should get the same spill weight otherwise it will be choosen
+  //      bl should get the same spill weight otherwise it will be chosen
   //      as a spill candidate since spilling bh doesn't make ebx available.
   for (unsigned i = 0, e = Supers.size(); i != e; ++i) {
     for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr)
@@ -993,7 +1002,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
   if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
     VNInfo *vni = cur->begin()->valno;
-    if (!vni->isUnused()) {
+    if (!vni->isUnused() && vni->def.isValid()) {
       MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
       if (CopyMI && CopyMI->isCopy()) {
         unsigned DstSubReg = CopyMI->getOperand(0).getSubReg();
@@ -1109,11 +1118,18 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   // list.
   if (physReg) {
     DEBUG(dbgs() <<  tri_->getName(physReg) << '\n');
+    assert(RC->contains(physReg) && "Invalid candidate");
     vrm_->assignVirt2Phys(cur->reg, physReg);
     addRegUse(physReg);
     active_.push_back(std::make_pair(cur, cur->begin()));
     handled_.push_back(cur);
 
+    // Remember physReg for avoiding a write-after-write hazard in the next
+    // instruction.
+    if (AvoidWAWHazard &&
+        tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg)))
+      avoidWAW_ = physReg;
+
     // "Upgrade" the physical register since it has been allocated.
     UpgradeRegister(physReg);
     if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
@@ -1229,8 +1245,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   // linearscan.
   if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
     DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
-    SmallVector<LiveInterval*, 8> spillIs, added;
-    spiller_->spill(cur, added, spillIs);
+    SmallVector<LiveInterval*, 8> added;
+    LiveRangeEdit LRE(*cur, added);
+    spiller_->spill(LRE);
 
     std::sort(added.begin(), added.end(), LISorter());
     if (added.empty())
@@ -1306,7 +1323,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n');
     if (sli->beginIndex() < earliestStart)
       earliestStart = sli->beginIndex();
-    spiller_->spill(sli, added, spillIs);
+    LiveRangeEdit LRE(*sli, added, 0, &spillIs);
+    spiller_->spill(LRE);
     spilled.insert(sli->reg);
   }
 
@@ -1442,7 +1460,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
     if (reservedRegs_.test(Reg))
       continue;
     // Skip recently allocated registers.
-    if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) {
+    if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) {
       FreeReg = Reg;
       if (FreeReg < inactiveCounts.size())
         FreeRegInactiveCount = inactiveCounts[FreeReg];
@@ -1473,7 +1491,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
     if (reservedRegs_.test(Reg))
       continue;
     if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
-        FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) {
+        FreeRegInactiveCount < inactiveCounts[Reg] &&
+        (!SkipDGRegs || !isRecentlyUsed(Reg))) {
       FreeReg = Reg;
       FreeRegInactiveCount = inactiveCounts[Reg];
       if (FreeRegInactiveCount == MaxInactiveCount)
@@ -1524,12 +1543,10 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
       return Preference;
   }
 
-  if (!DowngradedRegs.empty()) {
-    unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
-                                      true);
-    if (FreeReg)
-      return FreeReg;
-  }
+  unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
+                                    true);
+  if (FreeReg)
+    return FreeReg;
   return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
 }
 
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index ea0d1fe..1e1f1e0 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -534,10 +534,9 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
       vregsToAlloc.erase(vreg);
       const LiveInterval* spillInterval = &lis->getInterval(vreg);
       double oldWeight = spillInterval->weight;
-      SmallVector<LiveInterval*, 8> spillIs;
       rmf->rememberUseDefs(spillInterval);
       std::vector<LiveInterval*> newSpills =
-        lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm);
+        lis->addIntervalsForSpills(*spillInterval, 0, loopInfo, *vrm);
       addStackInterval(spillInterval, mri);
       rmf->rememberSpills(spillInterval, newSpills);
 
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index a2580b8..ebfe533 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -126,9 +126,10 @@ void RegScavenger::forward() {
     MBBI = MBB->begin();
     Tracking = true;
   } else {
-    assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+    assert(MBBI != MBB->end() && "Already past the end of the basic block!");
     MBBI = llvm::next(MBBI);
   }
+  assert(MBBI != MBB->end() && "Already at the end of the basic block!");
 
   MachineInstr *MI = MBBI;
 
@@ -241,12 +242,13 @@ unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
 
 /// getRegsAvailable - Return all available registers in the register class
 /// in Mask.
-void RegScavenger::getRegsAvailable(const TargetRegisterClass *RC,
-                                    BitVector &Mask) {
+BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
+  BitVector Mask(TRI->getNumRegs());
   for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
        I != E; ++I)
     if (!isAliasUsed(*I))
       Mask.set(*I);
+  return Mask;
 }
 
 /// findSurvivorReg - Return the candidate register that is unused for the
@@ -335,9 +337,13 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
   }
 
   // Try to find a register that's unused if there is one, as then we won't
-  // have to spill.
-  if ((Candidates & RegsAvailable).any())
-     Candidates &= RegsAvailable;
+  // have to spill. Search explicitly rather than masking out based on
+  // RegsAvailable, as RegsAvailable does not take aliases into account.
+  // That's what getRegsAvailable() is for.
+  BitVector Available = getRegsAvailable(RC);
+
+  if ((Candidates & Available).any())
+     Candidates &= Available;
 
   // Find the register whose use is furthest away.
   MachineBasicBlock::iterator UseMI;
diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp
index cbfd5a2..c8de382 100644
--- a/lib/CodeGen/RenderMachineFunction.cpp
+++ b/lib/CodeGen/RenderMachineFunction.cpp
@@ -47,7 +47,7 @@ outputFileSuffix("rmf-file-suffix",
 
 static cl::opt<std::string>
 machineFuncsToRender("rmf-funcs",
-                     cl::desc("Coma seperated list of functions to render"
+                     cl::desc("Comma separated list of functions to render"
                               ", or \"*\"."),
                      cl::init(""), cl::Hidden);
 
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 3388889..1302395 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -472,7 +472,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
 #endif
 }
 
-/// AddPred - Updates the topological ordering to accomodate an edge
+/// AddPred - Updates the topological ordering to accommodate an edge
 /// to be added from SUnit X to SUnit Y.
 void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
   int UpperBound, LowerBound;
@@ -490,7 +490,7 @@ void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
   }
 }
 
-/// RemovePred - Updates the topological ordering to accomodate an
+/// RemovePred - Updates the topological ordering to accommodate an
 /// an edge to be removed from the specified node N from the predecessors
 /// of the current node M.
 void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index f17023e..67c209e 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -371,7 +371,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
                 // will be overlapped by work done outside the current
                 // scheduling region.
                 Latency -= std::min(Latency, Count);
-                // Add the artifical edge.
+                // Add the artificial edge.
                 ExitSU.addPred(SDep(SU, SDep::Order, Latency,
                                     /*Reg=*/0, /*isNormalMemory=*/false,
                                     /*isMustAlias=*/false,
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 027f615..4b55a22 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -51,7 +51,8 @@ namespace llvm {
     /// If you want to override the dot attributes printed for a particular
     /// edge, override this method.
     static std::string getEdgeAttributes(const SUnit *Node,
-                                         SUnitIterator EI) {
+                                         SUnitIterator EI,
+                                         const ScheduleDAG *Graph) {
       if (EI.isArtificialDep())
         return "color=cyan,style=dashed";
       if (EI.isCtrlDep())
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9cc70a3..f427511 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -319,6 +319,10 @@ void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
   ((DAGCombiner*)DC)->AddToWorkList(N);
 }
 
+void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
+  ((DAGCombiner*)DC)->removeFromWorkList(N);
+}
+
 SDValue TargetLowering::DAGCombinerInfo::
 CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
@@ -1290,6 +1294,16 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
   return SDValue();
 }
 
+/// isCarryMaterialization - Returns true if V is an ADDE node that is known to
+/// return 0 or 1 depending on the carry flag.
+static bool isCarryMaterialization(SDValue V) {
+  if (V.getOpcode() != ISD::ADDE)
+    return false;
+
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(0));
+  return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1);
+}
+
 SDValue DAGCombiner::visitADD(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -1453,6 +1467,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
   }
 
+  // add (adde 0, 0, glue), X -> adde X, 0, glue
+  if (N0->hasOneUse() && isCarryMaterialization(N0))
+    return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
+                       DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0),
+                       N0.getOperand(2));
+
+  // add X, (adde 0, 0, glue) -> adde X, 0, glue
+  if (N1->hasOneUse() && isCarryMaterialization(N1))
+    return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
+                       DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0),
+                       N1.getOperand(2));
+
   return SDValue();
 }
 
@@ -1496,6 +1522,16 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
                                    N->getDebugLoc(), MVT::Glue));
   }
 
+  // addc (adde 0, 0, glue), X -> adde X, 0, glue
+  if (N0->hasOneUse() && isCarryMaterialization(N0))
+    return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1,
+                       DAG.getConstant(0, VT), N0.getOperand(2));
+
+  // addc X, (adde 0, 0, glue) -> adde X, 0, glue
+  if (N1->hasOneUse() && isCarryMaterialization(N1))
+    return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0,
+                       DAG.getConstant(0, VT), N1.getOperand(2));
+
   return SDValue();
 }
 
@@ -1506,6 +1542,12 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
 
+  // If both operands are null we know that carry out will always be false.
+  if (N0C && N0C->isNullValue() && N0 == N1)
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getNode(ISD::CARRY_FALSE,
+                                                             N->getDebugLoc(),
+                                                             MVT::Glue));
+
   // canonicalize constant to RHS
   if (N0C && !N1C)
     return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
@@ -3281,8 +3323,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       return DAG.getUNDEF(VT);
 
     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+      uint64_t ShiftAmt = N1C->getZExtValue();
       SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
-                                       N0.getOperand(0), N1);
+                                       N0.getOperand(0),
+                          DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
       AddToWorkList(SmallShift.getNode());
       return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
     }
@@ -3688,7 +3732,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
 
   // fold (sext (load x)) -> (sext (truncate (sextload x)))
   // None of the supported targets knows how to perform load and sign extend
-  // in one instruction.  We only perform this transformation on scalars.
+  // on vectors in one instruction.  We only perform this transformation on
+  // scalars.
   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
@@ -3839,7 +3884,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
         // CombineTo deleted the truncate, if needed, but not what's under it.
         AddToWorkList(oye);
       }
-      return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
 
@@ -3892,7 +3937,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
 
   // fold (zext (load x)) -> (zext (truncate (zextload x)))
   // None of the supported targets knows how to perform load and vector_zext
-  // in one instruction.  We only perform this transformation on scalar zext.
+  // on vectors in one instruction.  We only perform this transformation on
+  // scalars.
   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
@@ -4066,7 +4112,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
         // CombineTo deleted the truncate, if needed, but not what's under it.
         AddToWorkList(oye);
       }
-      return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
 
@@ -4101,7 +4147,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
 
   // fold (aext (load x)) -> (aext (truncate (extload x)))
   // None of the supported targets knows how to perform load and any_ext
-  // in one instruction.  We only perform this transformation on scalars.
+  // on vectors in one instruction.  We only perform this transformation on
+  // scalars.
   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
@@ -4514,7 +4561,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   // See if we can simplify the input to this truncate through knowledge that
   // only the low bits are being used.
   // For example "trunc (or (shl x, 8), y)" // -> trunc y
-  // Currenly we only perform this optimization on scalars because vectors
+  // Currently we only perform this optimization on scalars because vectors
   // may have different active low bits.
   if (!VT.isVector()) {
     SDValue Shorter =
@@ -5101,7 +5148,9 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   EVT OpVT = N0.getValueType();
 
   // fold (sint_to_fp c1) -> c1fp
-  if (N0C && OpVT != MVT::ppcf128)
+  if (N0C && OpVT != MVT::ppcf128 &&
+      // ...but only if the target supports immediate floating-point values
+      (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
     return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
 
   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
@@ -5123,7 +5172,9 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   EVT OpVT = N0.getValueType();
 
   // fold (uint_to_fp c1) -> c1fp
-  if (N0C && OpVT != MVT::ppcf128)
+  if (N0C && OpVT != MVT::ppcf128 &&
+      // ...but only if the target supports immediate floating-point values
+      (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
     return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
 
   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
@@ -5817,8 +5868,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
   // value.
   // TODO: Handle store large -> read small portion.
   // TODO: Handle TRUNCSTORE/LOADEXT
-  if (LD->getExtensionType() == ISD::NON_EXTLOAD &&
-      !LD->isVolatile()) {
+  if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
       if (PrevST->getBasePtr() == Ptr &&
@@ -6217,6 +6267,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
                           ST->isNonTemporal(), OrigAlign);
   }
 
+  // Turn 'store undef, Ptr' -> nothing.
+  if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
+    return Chain;
+
   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
     // NOTE: If the original store is volatile, this transform must not increase
@@ -6250,8 +6304,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
                               Ptr, ST->getPointerInfo(), ST->isVolatile(),
                               ST->isNonTemporal(), ST->getAlignment());
-        } else if (!ST->isVolatile() &&
-                   TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+        }
+
+        if (!ST->isVolatile() &&
+            TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
           // Many FP stores are not made apparent until after legalize, e.g. for
           // argument passing.  Since this is so common, custom legalize the
           // 64-bit integer store into two 32-bit stores.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 490b857..3af9482 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -43,6 +43,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -121,10 +122,9 @@ unsigned FastISel::getRegForValue(const Value *V) {
   // only locally. This is because Instructions already have the SSA
   // def-dominates-use requirement enforced.
   DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
-  if (I != FuncInfo.ValueMap.end()) {
-    unsigned Reg = I->second;
-    return Reg;
-  }
+  if (I != FuncInfo.ValueMap.end())
+    return I->second;
+
   unsigned Reg = LocalValueMap[V];
   if (Reg != 0)
     return Reg;
@@ -164,8 +164,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
     Reg =
       getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
   } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
-    // Try to emit the constant directly.
-    Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+    if (CF->isNullValue()) {
+      Reg = TargetMaterializeFloatZero(CF);
+    } else {
+      // Try to emit the constant directly.
+      Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+    }
 
     if (!Reg) {
       // Try to emit the constant by using an integer constant with a cast.
@@ -330,23 +334,51 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
       return false;
   }
 
+  // Check if the first operand is a constant, and handle it as "ri".  At -O0,
+  // we don't have anything that canonicalizes operand order.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
+    if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
+      unsigned Op1 = getRegForValue(I->getOperand(1));
+      if (Op1 == 0) return false;
+
+      bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+      unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1,
+                                        Op1IsKill, CI->getZExtValue(),
+                                        VT.getSimpleVT());
+      if (ResultReg == 0) return false;
+
+      // We successfully emitted code for the given LLVM Instruction.
+      UpdateValueMap(I, ResultReg);
+      return true;
+    }
+
+
   unsigned Op0 = getRegForValue(I->getOperand(0));
-  if (Op0 == 0)
-    // Unhandled operand. Halt "fast" selection and bail.
+  if (Op0 == 0)   // Unhandled operand. Halt "fast" selection and bail.
     return false;
 
   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
 
   // Check if the second operand is a constant and handle it appropriately.
   if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-    unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(),
-                                     ISDOpcode, Op0, Op0IsKill,
-                                     CI->getZExtValue());
-    if (ResultReg != 0) {
-      // We successfully emitted code for the given LLVM Instruction.
-      UpdateValueMap(I, ResultReg);
-      return true;
+    uint64_t Imm = CI->getZExtValue();
+
+    // Transform "sdiv exact X, 8" -> "sra X, 3".
+    if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
+        cast<BinaryOperator>(I)->isExact() &&
+        isPowerOf2_64(Imm)) {
+      Imm = Log2_64(Imm);
+      ISDOpcode = ISD::SRA;
     }
+
+    unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
+                                      Op0IsKill, Imm, VT.getSimpleVT());
+    if (ResultReg == 0) return false;
+
+    // We successfully emitted code for the given LLVM Instruction.
+    UpdateValueMap(I, ResultReg);
+    return true;
   }
 
   // Check if the second operand is a constant float.
@@ -454,15 +486,35 @@ bool FastISel::SelectGetElementPtr(const User *I) {
 }
 
 bool FastISel::SelectCall(const User *I) {
-  const Function *F = cast<CallInst>(I)->getCalledFunction();
+  const CallInst *Call = cast<CallInst>(I);
+
+  // Handle simple inline asms.
+  if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getArgOperand(0))) {
+    // Don't attempt to handle constraints.
+    if (!IA->getConstraintString().empty())
+      return false;
+
+    unsigned ExtraInfo = 0;
+    if (IA->hasSideEffects())
+      ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+    if (IA->isAlignStack())
+      ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(TargetOpcode::INLINEASM))
+      .addExternalSymbol(IA->getAsmString().c_str())
+      .addImm(ExtraInfo);
+    return true;
+  }
+
+  const Function *F = Call->getCalledFunction();
   if (!F) return false;
 
   // Handle selected intrinsic function calls.
-  unsigned IID = F->getIntrinsicID();
-  switch (IID) {
+  switch (F->getIntrinsicID()) {
   default: break;
   case Intrinsic::dbg_declare: {
-    const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
+    const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
     if (!DIVariable(DI->getVariable()).Verify() ||
         !FuncInfo.MF->getMMI().hasDebugInfo())
       return true;
@@ -494,7 +546,7 @@ bool FastISel::SelectCall(const User *I) {
   }
   case Intrinsic::dbg_value: {
     // This form of DBG_VALUE is target-independent.
-    const DbgValueInst *DI = cast<DbgValueInst>(I);
+    const DbgValueInst *DI = cast<DbgValueInst>(Call);
     const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
     const Value *V = DI->getValue();
     if (!V) {
@@ -523,65 +575,58 @@ bool FastISel::SelectCall(const User *I) {
     return true;
   }
   case Intrinsic::eh_exception: {
-    EVT VT = TLI.getValueType(I->getType());
-    switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
-    default: break;
-    case TargetLowering::Expand: {
-      assert(FuncInfo.MBB->isLandingPad() &&
-             "Call to eh.exception not in landing pad!");
-      unsigned Reg = TLI.getExceptionAddressRegister();
-      const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
-      unsigned ResultReg = createResultReg(RC);
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-              ResultReg).addReg(Reg);
-      UpdateValueMap(I, ResultReg);
-      return true;
-    }
-    }
-    break;
+    EVT VT = TLI.getValueType(Call->getType());
+    if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
+      break;
+
+    assert(FuncInfo.MBB->isLandingPad() &&
+           "Call to eh.exception not in landing pad!");
+    unsigned Reg = TLI.getExceptionAddressRegister();
+    const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+    unsigned ResultReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(Reg);
+    UpdateValueMap(Call, ResultReg);
+    return true;
   }
   case Intrinsic::eh_selector: {
-    EVT VT = TLI.getValueType(I->getType());
-    switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
-    default: break;
-    case TargetLowering::Expand: {
-      if (FuncInfo.MBB->isLandingPad())
-        AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB);
-      else {
+    EVT VT = TLI.getValueType(Call->getType());
+    if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
+      break;
+    if (FuncInfo.MBB->isLandingPad())
+      AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB);
+    else {
 #ifndef NDEBUG
-        FuncInfo.CatchInfoLost.insert(cast<CallInst>(I));
+      FuncInfo.CatchInfoLost.insert(Call);
 #endif
-        // FIXME: Mark exception selector register as live in.  Hack for PR1508.
-        unsigned Reg = TLI.getExceptionSelectorRegister();
-        if (Reg) FuncInfo.MBB->addLiveIn(Reg);
-      }
-
+      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
       unsigned Reg = TLI.getExceptionSelectorRegister();
-      EVT SrcVT = TLI.getPointerTy();
-      const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
-      unsigned ResultReg = createResultReg(RC);
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-              ResultReg).addReg(Reg);
-
-      bool ResultRegIsKill = hasTrivialKill(I);
-
-      // Cast the register to the type of the selector.
-      if (SrcVT.bitsGT(MVT::i32))
-        ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
-                               ResultReg, ResultRegIsKill);
-      else if (SrcVT.bitsLT(MVT::i32))
-        ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
-                               ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
-      if (ResultReg == 0)
-        // Unhandled operand. Halt "fast" selection and bail.
-        return false;
+      if (Reg) FuncInfo.MBB->addLiveIn(Reg);
+    }
 
-      UpdateValueMap(I, ResultReg);
+    unsigned Reg = TLI.getExceptionSelectorRegister();
+    EVT SrcVT = TLI.getPointerTy();
+    const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
+    unsigned ResultReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(Reg);
+
+    bool ResultRegIsKill = hasTrivialKill(Call);
+
+    // Cast the register to the type of the selector.
+    if (SrcVT.bitsGT(MVT::i32))
+      ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
+                             ResultReg, ResultRegIsKill);
+    else if (SrcVT.bitsLT(MVT::i32))
+      ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
+                             ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
+    if (ResultReg == 0)
+      // Unhandled operand. Halt "fast" selection and bail.
+      return false;
 
-      return true;
-    }
-    }
-    break;
+    UpdateValueMap(Call, ResultReg);
+
+    return true;
   }
   }
 
@@ -966,59 +1011,33 @@ unsigned FastISel::FastEmit_rri(MVT, MVT,
 unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
                                 unsigned Op0, bool Op0IsKill,
                                 uint64_t Imm, MVT ImmType) {
+  // If this is a multiply by a power of two, emit this as a shift left.
+  if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
+    Opcode = ISD::SHL;
+    Imm = Log2_64(Imm);
+  } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) {
+    // div x, 8 -> srl x, 3
+    Opcode = ISD::SRL;
+    Imm = Log2_64(Imm);
+  }
+
+  // Horrible hack (to be removed), check to make sure shift amounts are
+  // in-range.
+  if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) &&
+      Imm >= VT.getSizeInBits())
+    return 0;
+
   // First check if immediate type is legal. If not, we can't use the ri form.
   unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
   if (ResultReg != 0)
     return ResultReg;
   unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
-  if (MaterialReg == 0)
-    return 0;
-  return FastEmit_rr(VT, VT, Opcode,
-                     Op0, Op0IsKill,
-                     MaterialReg, /*Kill=*/true);
-}
-
-/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries
-/// to emit an instruction with a floating-point immediate operand using
-/// FastEmit_rf. If that fails, it materializes the immediate into a register
-/// and try FastEmit_rr instead.
-unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
-                                unsigned Op0, bool Op0IsKill,
-                                const ConstantFP *FPImm, MVT ImmType) {
-  // First check if immediate type is legal. If not, we can't use the rf form.
-  unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm);
-  if (ResultReg != 0)
-    return ResultReg;
-
-  // Materialize the constant in a register.
-  unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm);
   if (MaterialReg == 0) {
-    // If the target doesn't have a way to directly enter a floating-point
-    // value into a register, use an alternate approach.
-    // TODO: The current approach only supports floating-point constants
-    // that can be constructed by conversion from integer values. This should
-    // be replaced by code that creates a load from a constant-pool entry,
-    // which will require some target-specific work.
-    const APFloat &Flt = FPImm->getValueAPF();
-    EVT IntVT = TLI.getPointerTy();
-
-    uint64_t x[2];
-    uint32_t IntBitWidth = IntVT.getSizeInBits();
-    bool isExact;
-    (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
-                             APFloat::rmTowardZero, &isExact);
-    if (!isExact)
-      return 0;
-    APInt IntVal(IntBitWidth, 2, x);
-
-    unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(),
-                                     ISD::Constant, IntVal.getZExtValue());
-    if (IntegerReg == 0)
-      return 0;
-    MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT,
-                             ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true);
-    if (MaterialReg == 0)
-      return 0;
+    // This is a bit ugly/slow, but failing here means falling out of
+    // fast-isel, which would be very slow.
+    const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
+                                              VT.getSizeInBits());
+    MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
   }
   return FastEmit_rr(VT, VT, Opcode,
                      Op0, Op0IsKill,
@@ -1099,6 +1118,29 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
+unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode,
+                                   const TargetRegisterClass *RC,
+                                   unsigned Op0, bool Op0IsKill,
+                                   uint64_t Imm1, uint64_t Imm2) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addImm(Imm1)
+      .addImm(Imm2);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addImm(Imm1)
+      .addImm(Imm2);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
 unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
                                    const TargetRegisterClass *RC,
                                    unsigned Op0, bool Op0IsKill,
@@ -1160,6 +1202,23 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
+unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+                                  const TargetRegisterClass *RC,
+                                  uint64_t Imm1, uint64_t Imm2) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addImm(Imm1).addImm(Imm2);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
 unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
                                               unsigned Op0, bool Op0IsKill,
                                               uint32_t Idx) {
@@ -1215,7 +1274,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
       // Only handle legal types. Two interesting things to note here. First,
       // by bailing out early, we may leave behind some dead instructions,
       // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
-      // own moves. Second, this check is necessary becuase FastISel doesn't
+      // own moves. Second, this check is necessary because FastISel doesn't
       // use CreateRegs to create registers, so it always creates
       // exactly one register for each non-void instruction.
       EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 2ae3286..d8a5770 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -448,16 +448,30 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
   }
 }
 
-void llvm::CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB,
+void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
                          MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
-  for (BasicBlock::const_iterator I = SrcBB->begin(), E = --SrcBB->end();
-       I != E; ++I)
-    if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
-      // Apply the catch info to DestBB.
-      AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]);
+  SmallPtrSet<const BasicBlock*, 4> Visited;
+
+  // The 'eh.selector' call may not be in the direct successor of a basic block,
+  // but could be several successors deeper. If we don't find it, try going one
+  // level further. <rdar://problem/8824861>
+  while (Visited.insert(SuccBB)) {
+    for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end();
+         I != E; ++I)
+      if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
+        // Apply the catch info to LPad.
+        AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]);
 #ifndef NDEBUG
-      if (!FLI.MBBMap[SrcBB]->isLandingPad())
-        FLI.CatchInfoFound.insert(EHSel);
+        if (!FLI.MBBMap[SuccBB]->isLandingPad())
+          FLI.CatchInfoFound.insert(EHSel);
 #endif
-    }
+        return;
+      }
+
+    const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator());
+    if (Br && Br->isUnconditional())
+      SuccBB = Br->getSuccessor(0);
+    else
+      break;
+  }
 }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f08528f..2b6c56e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -61,10 +61,10 @@ class SelectionDAGLegalize {
 
   // Libcall insertion helpers.
 
-  /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
+  /// LastCALLSEQ - This keeps track of the CALLSEQ_END node that has been
   /// legalized.  We use this to ensure that calls are properly serialized
   /// against each other, including inserted libcalls.
-  SDValue LastCALLSEQ_END;
+  SmallVector<SDValue, 8> LastCALLSEQ;
 
   enum LegalizeAction {
     Legal,      // The target natively supports this operation.
@@ -142,6 +142,9 @@ private:
                              DebugLoc dl);
 
   SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+  SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
+                        unsigned NumOps, bool isSigned, DebugLoc dl);
+
   std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
                                                  SDNode *Node, bool isSigned);
   SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
@@ -153,6 +156,7 @@ private:
                            RTLIB::Libcall Call_I32,
                            RTLIB::Libcall Call_I64,
                            RTLIB::Libcall Call_I128);
+  void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 
   SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
   SDValue ExpandBUILD_VECTOR(SDNode *Node);
@@ -178,6 +182,15 @@ private:
 
   void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
   void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+  SDValue getLastCALLSEQ() { return LastCALLSEQ.back();  }
+  void setLastCALLSEQ(const SDValue s) { LastCALLSEQ.back() = s; }
+  void pushLastCALLSEQ(SDValue s) {
+    LastCALLSEQ.push_back(s);
+  }
+  void popLastCALLSEQ() {
+    LastCALLSEQ.pop_back();
+  }
 };
 }
 
@@ -223,7 +236,7 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
 }
 
 void SelectionDAGLegalize::LegalizeDAG() {
-  LastCALLSEQ_END = DAG.getEntryNode();
+  pushLastCALLSEQ(DAG.getEntryNode());
 
   // The legalize process is inherently a bottom-up recursive process (users
   // legalize their uses before themselves).  Given infinite stack space, we
@@ -251,14 +264,15 @@ void SelectionDAGLegalize::LegalizeDAG() {
 /// FindCallEndFromCallStart - Given a chained node that is part of a call
 /// sequence, find the CALLSEQ_END node that terminates the call sequence.
 static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
-  // Nested CALLSEQ_START/END constructs aren't yet legal,
-  // but we can DTRT and handle them correctly here.
+  int next_depth = depth;
   if (Node->getOpcode() == ISD::CALLSEQ_START)
-    depth++;
-  else if (Node->getOpcode() == ISD::CALLSEQ_END) {
-    depth--;
-    if (depth == 0)
+    next_depth = depth + 1;
+  if (Node->getOpcode() == ISD::CALLSEQ_END) {
+    assert(depth > 0 && "negative depth!");
+    if (depth == 1)
       return Node;
+    else
+      next_depth = depth - 1;
   }
   if (Node->use_empty())
     return 0;   // No CallSeqEnd
@@ -289,7 +303,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
     SDNode *User = *UI;
     for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
       if (User->getOperand(i) == TheChain)
-        if (SDNode *Result = FindCallEndFromCallStart(User, depth))
+        if (SDNode *Result = FindCallEndFromCallStart(User, next_depth))
           return Result;
   }
   return 0;
@@ -786,7 +800,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
       }
     }
   }
-  return SDValue();
+  return SDValue(0, 0);
 }
 
 /// LegalizeOp - We know that the specified value has a legal type, and
@@ -934,11 +948,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     case ISD::BR_JT:
     case ISD::BR_CC:
     case ISD::BRCOND:
-      // Branches tweak the chain to include LastCALLSEQ_END
+      assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
+      // Branches tweak the chain to include LastCALLSEQ
       Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
-                            LastCALLSEQ_END);
+                           getLastCALLSEQ());
       Ops[0] = LegalizeOp(Ops[0]);
-      LastCALLSEQ_END = DAG.getEntryNode();
+      setLastCALLSEQ(DAG.getEntryNode());
       break;
     case ISD::SHL:
     case ISD::SRL:
@@ -948,7 +963,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       // Legalizing shifts/rotates requires adjusting the shift amount
       // to the appropriate width.
       if (!Ops[1].getValueType().isVector())
-        Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1]));
+        Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+                                                      Ops[1]));
       break;
     case ISD::SRL_PARTS:
     case ISD::SRA_PARTS:
@@ -956,7 +972,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       // Legalizing shifts/rotates requires adjusting the shift amount
       // to the appropriate width.
       if (!Ops[2].getValueType().isVector())
-        Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[2]));
+        Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+                                                      Ops[2]));
       break;
     }
 
@@ -1024,8 +1041,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     }
     break;
   case ISD::CALLSEQ_START: {
-    static int depth = 0;
     SDNode *CallEnd = FindCallEndFromCallStart(Node);
+    assert(CallEnd && "didn't find CALLSEQ_END!");
 
     // Recursively Legalize all of the inputs of the call end that do not lead
     // to this call start.  This ensures that any libcalls that need be inserted
@@ -1042,9 +1059,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
     // Merge in the last call to ensure that this call starts after the last
     // call ended.
-    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken && depth == 0) {
+    if (getLastCALLSEQ().getOpcode() != ISD::EntryToken) {
       Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                         Tmp1, LastCALLSEQ_END);
+                         Tmp1, getLastCALLSEQ());
       Tmp1 = LegalizeOp(Tmp1);
     }
 
@@ -1065,29 +1082,28 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     // sequence have been legalized, legalize the call itself.  During this
     // process, no libcalls can/will be inserted, guaranteeing that no calls
     // can overlap.
-
-    SDValue Saved_LastCALLSEQ_END = LastCALLSEQ_END ;
     // Note that we are selecting this call!
-    LastCALLSEQ_END = SDValue(CallEnd, 0);
+    setLastCALLSEQ(SDValue(CallEnd, 0));
 
-    depth++;
     // Legalize the call, starting from the CALLSEQ_END.
-    LegalizeOp(LastCALLSEQ_END);
-    depth--;
-    assert(depth >= 0 && "Un-matched CALLSEQ_START?");
-    if (depth > 0)
-      LastCALLSEQ_END = Saved_LastCALLSEQ_END;
+    LegalizeOp(getLastCALLSEQ());
     return Result;
   }
   case ISD::CALLSEQ_END:
-    // If the CALLSEQ_START node hasn't been legalized first, legalize it.  This
-    // will cause this node to be legalized as well as handling libcalls right.
-    if (LastCALLSEQ_END.getNode() != Node) {
-      LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
-      DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
-      assert(I != LegalizedNodes.end() &&
-             "Legalizing the call start should have legalized this node!");
-      return I->second;
+    {
+      SDNode *myCALLSEQ_BEGIN = FindCallStartFromCallEnd(Node);
+
+      // If the CALLSEQ_START node hasn't been legalized first, legalize it.  This
+      // will cause this node to be legalized as well as handling libcalls right.
+      if (getLastCALLSEQ().getNode() != Node) {
+        LegalizeOp(SDValue(myCALLSEQ_BEGIN, 0));
+        DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+        assert(I != LegalizedNodes.end() &&
+               "Legalizing the call start should have legalized this node!");
+        return I->second;
+      }
+
+      pushLastCALLSEQ(SDValue(myCALLSEQ_BEGIN, 0));
     }
 
     // Otherwise, the call start has been legalized and everything is going
@@ -1116,6 +1132,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       }
     }
     // This finishes up call legalization.
+    popLastCALLSEQ();
+
     // If the CALLSEQ_END node has a flag, remember that we legalized it.
     AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
     if (Node->getNumValues() == 2)
@@ -2035,9 +2053,43 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
     return DAG.getRoot();
 
   // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+  return CallInfo.first;
+}
+
+/// ExpandLibCall - Generate a libcall taking the given operands as arguments 
+/// and returning a result of type RetVT.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
+                                            const SDValue *Ops, unsigned NumOps,
+                                            bool isSigned, DebugLoc dl) {
+  TargetLowering::ArgListTy Args;
+  Args.reserve(NumOps);
+  
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    Entry.Node = Ops[i];
+    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+  
+  const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+  std::pair<SDValue,SDValue> CallInfo =
+  TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+                  false, 0, TLI.getLibcallCallingConv(LC), false,
+                  /*isReturnValueUsed=*/true,
+                  Callee, Args, DAG, dl);
+  
+  // Legalize the call sequence, starting with the chain.  This will advance
   // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
   // was added by LowerCallTo (guaranteeing proper serialization of calls).
   LegalizeOp(CallInfo.second);
+
   return CallInfo.first;
 }
 
@@ -2072,7 +2124,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
                     Callee, Args, DAG, Node->getDebugLoc());
 
   // Legalize the call sequence, starting with the chain.  This will advance
-  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+  // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
   // was added by LowerCallTo (guaranteeing proper serialization of calls).
   LegalizeOp(CallInfo.second);
   return CallInfo;
@@ -2112,6 +2164,113 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
   return ExpandLibCall(LC, Node, isSigned);
 }
 
+/// isDivRemLibcallAvailable - Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+                                     const TargetLowering &TLI) {
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unexpected request for libcall!");
+  case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
+  case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+  case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+  case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+  }
+
+  return TLI.getLibcallName(LC) != 0;
+}
+
+/// UseDivRem - Only issue divrem libcall if both quotient and remainder are
+/// needed.
+static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) {
+  unsigned OtherOpcode = 0;
+  if (isSigned)
+    OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
+  else
+    OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV;
+
+  SDValue Op0 = Node->getOperand(0);
+  SDValue Op1 = Node->getOperand(1);
+  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+         UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+    SDNode *User = *UI;
+    if (User == Node)
+      continue;
+    if (User->getOpcode() == OtherOpcode &&
+        User->getOperand(0) == Op0 &&
+        User->getOperand(1) == Op1)
+      return true;
+  }
+  return false;
+}
+
+/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem
+/// pairs.
+void
+SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
+                                          SmallVectorImpl<SDValue> &Results) {
+  unsigned Opcode = Node->getOpcode();
+  bool isSigned = Opcode == ISD::SDIVREM;
+
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unexpected request for libcall!");
+  case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
+  case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+  case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+  case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+  }
+
+  // The input chain to this libcall is the entry node of the function.
+  // Legalizing the call will automatically add the previous call to the
+  // dependence.
+  SDValue InChain = DAG.getEntryNode();
+
+  EVT RetVT = Node->getValueType(0);
+  const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+
+  // Also pass the return address of the remainder.
+  SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
+  Entry.Node = FIPtr;
+  Entry.Ty = RetTy->getPointerTo();
+  Entry.isSExt = isSigned;
+  Entry.isZExt = !isSigned;
+  Args.push_back(Entry);
+
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
+  DebugLoc dl = Node->getDebugLoc();
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+                    0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+                    /*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
+
+  // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+
+  // Remainder is loaded back from the stack frame.
+  SDValue Rem = DAG.getLoad(RetVT, dl, getLastCALLSEQ(), FIPtr,
+                            MachinePointerInfo(), false, false, 0);
+  Results.push_back(CallInfo.first);
+  Results.push_back(Rem);
+}
+
 /// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
 /// INT_TO_FP operation of the specified operand when the target requests that
 /// we expand it.  At this point, we know that the result and operand types are
@@ -2759,7 +2918,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   }
   case ISD::FP_ROUND_INREG: {
     // The only way we can lower this is to turn it into a TRUNCSTORE,
-    // EXTLOAD pair, targetting a temporary location (a stack slot).
+    // EXTLOAD pair, targeting a temporary location (a stack slot).
 
     // NOTE: there is a choice here between constantly creating new stack
     // slots and always reusing the same one.  We currently always create
@@ -3085,24 +3244,25 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
     Tmp2 = Node->getOperand(0);
     Tmp3 = Node->getOperand(1);
-    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+        (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+         UseDivRem(Node, isSigned, false))) {
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
     } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
       // X % Y -> X-X/Y*Y
       Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
       Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
       Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
-    } else if (isSigned) {
+    } else if (isSigned)
       Tmp1 = ExpandIntLibCall(Node, true,
                               RTLIB::SREM_I8,
                               RTLIB::SREM_I16, RTLIB::SREM_I32,
                               RTLIB::SREM_I64, RTLIB::SREM_I128);
-    } else {
+    else
       Tmp1 = ExpandIntLibCall(Node, false,
                               RTLIB::UREM_I8,
                               RTLIB::UREM_I16, RTLIB::UREM_I32,
                               RTLIB::UREM_I64, RTLIB::UREM_I128);
-    }
     Results.push_back(Tmp1);
     break;
   }
@@ -3112,7 +3272,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
     EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
-    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT))
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+        (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+         UseDivRem(Node, isSigned, true)))
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
                          Node->getOperand(1));
     else if (isSigned)
@@ -3141,6 +3303,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Results.push_back(Tmp1.getValue(1));
     break;
   }
+  case ISD::SDIVREM:
+  case ISD::UDIVREM:
+    // Expand into divrem libcall
+    ExpandDivRemLibCall(Node, Results);
+    break;
   case ISD::MUL: {
     EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
@@ -3225,6 +3392,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::UMULO:
   case ISD::SMULO: {
     EVT VT = Node->getValueType(0);
+    EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
     SDValue LHS = Node->getOperand(0);
     SDValue RHS = Node->getOperand(1);
     SDValue BottomHalf;
@@ -3242,7 +3410,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       TopHalf = BottomHalf.getValue(1);
     } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
                                                  VT.getSizeInBits() * 2))) {
-      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
       LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
       RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
       Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
@@ -3255,7 +3422,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       // have a libcall big enough.
       // Also, we can fall back to a division in some cases, but that's a big
       // performance hit in the general case.
-      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
       RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
       if (WideVT == MVT::i16)
         LC = RTLIB::MUL_I16;
@@ -3266,15 +3432,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       else if (WideVT == MVT::i128)
         LC = RTLIB::MUL_I128;
       assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
-      LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
-      RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
-
-      SDValue Ret = ExpandLibCall(LC, Node, isSigned);
-      BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret);
-      TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret,
-                       DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy()));
-      TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf);
+      
+      // The high part is obtained by SRA'ing all but one of the bits of low 
+      // part.
+      unsigned LoSize = VT.getSizeInBits();
+      SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS,
+                                DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+      SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS,
+                                DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+
+      // Here we're passing the 2 arguments explicitly as 4 arguments that are
+      // pre-lowered to the correct types. This all depends upon WideVT not
+      // being a legal type for the architecture and thus has to be split to
+      // two arguments.
+      SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+      SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+      BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+                               DAG.getIntPtrConstant(0));
+      TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+                            DAG.getIntPtrConstant(1));
     }
+    
     if (isSigned) {
       Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1,
                              TLI.getShiftAmountTy(BottomHalf.getValueType()));
@@ -3409,7 +3587,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
 
     LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
                           Tmp2, Tmp3, Tmp4, dl);
-    LastCALLSEQ_END = DAG.getEntryNode();
+    assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
+    setLastCALLSEQ(DAG.getEntryNode());
 
     assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
     Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index f0752df..935aab0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1051,8 +1051,6 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
   case ISD::UADDO:
   case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
-  case ISD::UMULO:
-  case ISD::SMULO: ExpandIntRes_UMULSMULO(N, Lo, Hi); break;
   }
 
   // If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1428,9 +1426,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
       HiOps[2] = Lo.getValue(1);
       Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
     }
-    return;    
+    return;
   }
-  
+
   if (N->getOpcode() == ISD::ADD) {
     Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
     Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
@@ -2128,31 +2126,6 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
   ReplaceValueWith(SDValue(N, 1), Ofl);
 }
 
-void DAGTypeLegalizer::ExpandIntRes_UMULSMULO(SDNode *N,
-                                              SDValue &Lo, SDValue &Hi) {
-  SDValue LHS = N->getOperand(0);
-  SDValue RHS = N->getOperand(1);
-  DebugLoc dl = N->getDebugLoc();
-  EVT VT = N->getValueType(0);
-  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() / 2);
-  // Expand the result by simply replacing it with the equivalent
-  // non-overflow-checking operation.
-  SDValue Ret = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
-  SplitInteger(Ret, Lo, Hi);
-  
-  // Now calculate overflow.
-  SDValue Ofl;
-  if (N->getOpcode() == ISD::UMULO)
-    Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi,
-                       DAG.getConstant(0, VT), ISD::SETNE);
-  else {
-    SDValue Tmp = DAG.getConstant(VT.getSizeInBits() - 1, HalfVT);
-    Tmp = DAG.getNode(ISD::SRA, dl, HalfVT, Lo, Tmp);
-    Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, Tmp, ISD::SETNE);
-  }
-  ReplaceValueWith(SDValue(N, 1), Ofl);
-}
-
 void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 3f81bbb..5409b88 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -348,7 +348,6 @@ private:
 
   void ExpandIntRes_SADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_UADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
-  void ExpandIntRes_UMULSMULO	      (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandShiftByConstant(SDNode *N, unsigned Amt,
                              SDValue &Lo, SDValue &Hi);
@@ -523,6 +522,7 @@ private:
   SDValue ScalarizeVecRes_BITCAST(SDNode *N);
   SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
   SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
+  SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
   SDValue ScalarizeVecRes_FPOWI(SDNode *N);
   SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
@@ -566,7 +566,6 @@ private:
   void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
-  void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 167dbe0..5d0f923 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -58,6 +58,9 @@ class VectorLegalizer {
   SDValue UnrollVSETCC(SDValue Op);
   // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
   // isn't legal.
+  // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
+  // SINT_TO_FLOAT and SHR on vectors isn't legal.
+  SDValue ExpandUINT_TO_FLOAT(SDValue Op);
   SDValue ExpandFNEG(SDValue Op);
   // Implements vector promotion; this is essentially just bitcasting the
   // operands to a different type and bitcasting the result back to the
@@ -207,7 +210,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
     // FALL THROUGH
   }
   case TargetLowering::Expand:
-    if (Node->getOpcode() == ISD::FNEG)
+    if (Node->getOpcode() == ISD::UINT_TO_FP)
+      Result = ExpandUINT_TO_FLOAT(Op);
+    else if (Node->getOpcode() == ISD::FNEG)
       Result = ExpandFNEG(Op);
     else if (Node->getOpcode() == ISD::VSETCC)
       Result = UnrollVSETCC(Op);
@@ -251,6 +256,48 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
   return DAG.getNode(ISD::BITCAST, dl, VT, Op);
 }
 
+SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
+
+
+  EVT VT = Op.getOperand(0).getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+
+  // Make sure that the SINT_TO_FP and SRL instructions are available.
+  if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) ||
+      !TLI.isOperationLegalOrCustom(ISD::SRL, VT))
+      return DAG.UnrollVectorOp(Op.getNode());
+
+ EVT SVT = VT.getScalarType();
+  assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
+      "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
+
+  unsigned BW = SVT.getSizeInBits();
+  SDValue HalfWord = DAG.getConstant(BW/2, VT);
+
+  // Constants to clear the upper part of the word.
+  // Notice that we can also use SHL+SHR, but using a constant is slightly
+  // faster on x86.
+  uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
+  SDValue HalfWordMask = DAG.getConstant(HWMask, VT);
+
+  // Two to the power of half-word-size.
+  SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType());
+
+  // Clear upper part of LO, lower HI
+  SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
+  SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
+
+  // Convert hi and lo to floats
+  // Convert the hi part back to the upper values
+  SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
+          fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
+  SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
+
+  // Add the two halves
+  return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
+}
+
+
 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
   if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
     SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 182f8fc..0b4dd35 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -50,6 +50,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::BUILD_VECTOR:      R = N->getOperand(0); break;
   case ISD::CONVERT_RNDSAT:    R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
   case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::FP_ROUND:          R = ScalarizeVecRes_FP_ROUND(N); break;
   case ISD::FP_ROUND_INREG:    R = ScalarizeVecRes_InregOp(N); break;
   case ISD::FPOWI:             R = ScalarizeVecRes_FPOWI(N); break;
   case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
@@ -63,27 +64,33 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::VECTOR_SHUFFLE:    R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
   case ISD::VSETCC:            R = ScalarizeVecRes_VSETCC(N); break;
 
+  case ISD::ANY_EXTEND:
   case ISD::CTLZ:
   case ISD::CTPOP:
   case ISD::CTTZ:
   case ISD::FABS:
+  case ISD::FCEIL:
   case ISD::FCOS:
+  case ISD::FEXP:
+  case ISD::FEXP2:
+  case ISD::FFLOOR:
+  case ISD::FLOG:
+  case ISD::FLOG10:
+  case ISD::FLOG2:
+  case ISD::FNEARBYINT:
   case ISD::FNEG:
+  case ISD::FP_EXTEND:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
+  case ISD::FRINT:
   case ISD::FSIN:
   case ISD::FSQRT:
   case ISD::FTRUNC:
-  case ISD::FFLOOR:
-  case ISD::FCEIL:
-  case ISD::FRINT:
-  case ISD::FNEARBYINT:
-  case ISD::UINT_TO_FP:
+  case ISD::SIGN_EXTEND:
   case ISD::SINT_TO_FP:
   case ISD::TRUNCATE:
-  case ISD::SIGN_EXTEND:
+  case ISD::UINT_TO_FP:
   case ISD::ZERO_EXTEND:
-  case ISD::ANY_EXTEND:
     R = ScalarizeVecRes_UnaryOp(N);
     break;
 
@@ -145,6 +152,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
                      N->getOperand(0), N->getOperand(1));
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
+  EVT NewVT = N->getValueType(0).getVectorElementType();
+  SDValue Op = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+                     NewVT, Op, N->getOperand(1));
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
   SDValue Op = GetScalarizedVector(N->getOperand(0));
   return DAG.getNode(ISD::FPOWI, N->getDebugLoc(),
@@ -405,11 +419,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
   case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
   case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
-
   case ISD::BITCAST:           SplitVecRes_BITCAST(N, Lo, Hi); break;
   case ISD::BUILD_VECTOR:      SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
   case ISD::CONCAT_VECTORS:    SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
-  case ISD::CONVERT_RNDSAT:    SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;
   case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
   case ISD::FP_ROUND_INREG:    SplitVecRes_InregOp(N, Lo, Hi); break;
   case ISD::FPOWI:             SplitVecRes_FPOWI(N, Lo, Hi); break;
@@ -427,32 +439,35 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
     SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
     break;
 
-  case ISD::CTTZ:
+  case ISD::ANY_EXTEND:
+  case ISD::CONVERT_RNDSAT:
   case ISD::CTLZ:
   case ISD::CTPOP:
-  case ISD::FNEG:
+  case ISD::CTTZ:
   case ISD::FABS:
-  case ISD::FSQRT:
-  case ISD::FSIN:
+  case ISD::FCEIL:
   case ISD::FCOS:
-  case ISD::FTRUNC:
+  case ISD::FEXP:
+  case ISD::FEXP2:
   case ISD::FFLOOR:
-  case ISD::FCEIL:
-  case ISD::FRINT:
+  case ISD::FLOG:
+  case ISD::FLOG10:
+  case ISD::FLOG2:
   case ISD::FNEARBYINT:
+  case ISD::FNEG:
+  case ISD::FP_EXTEND:
+  case ISD::FP_ROUND:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
+  case ISD::FRINT:
+  case ISD::FSIN:
+  case ISD::FSQRT:
+  case ISD::FTRUNC:
+  case ISD::SIGN_EXTEND:
   case ISD::SINT_TO_FP:
-  case ISD::UINT_TO_FP:
   case ISD::TRUNCATE:
-  case ISD::SIGN_EXTEND:
+  case ISD::UINT_TO_FP:
   case ISD::ZERO_EXTEND:
-  case ISD::ANY_EXTEND:
-  case ISD::FEXP:
-  case ISD::FEXP2:
-  case ISD::FLOG:
-  case ISD::FLOG2:
-  case ISD::FLOG10:
     SplitVecRes_UnaryOp(N, Lo, Hi);
     break;
 
@@ -587,60 +602,6 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
   Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size());
 }
 
-void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
-                                                  SDValue &Hi) {
-  EVT LoVT, HiVT;
-  DebugLoc dl = N->getDebugLoc();
-  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
-
-  SDValue DTyOpLo =  DAG.getValueType(LoVT);
-  SDValue DTyOpHi =  DAG.getValueType(HiVT);
-
-  SDValue RndOp = N->getOperand(3);
-  SDValue SatOp = N->getOperand(4);
-  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
-
-  // Split the input.
-  SDValue VLo, VHi;
-  EVT InVT = N->getOperand(0).getValueType();
-  switch (getTypeAction(InVT)) {
-  default: llvm_unreachable("Unexpected type action!");
-  case Legal: {
-    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
-                                 LoVT.getVectorNumElements());
-    VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
-                      DAG.getIntPtrConstant(0));
-    VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
-                      DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
-    break;
-  }
-  case SplitVector:
-    GetSplitVector(N->getOperand(0), VLo, VHi);
-    break;
-  case WidenVector: {
-    // If the result needs to be split and the input needs to be widened,
-    // the two types must have different lengths. Use the widened result
-    // and extract from it to do the split.
-    SDValue InOp = GetWidenedVector(N->getOperand(0));
-    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
-                                 LoVT.getVectorNumElements());
-    VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                      DAG.getIntPtrConstant(0));
-    VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                      DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
-    break;
-  }
-  }
-
-  SDValue STyOpLo =  DAG.getValueType(VLo.getValueType());
-  SDValue STyOpHi =  DAG.getValueType(VHi.getValueType());
-
-  Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp,
-                            CvtCode);
-  Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp,
-                            CvtCode);
-}
-
 void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
                                                      SDValue &Hi) {
   SDValue Vec = N->getOperand(0);
@@ -840,8 +801,25 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
   }
   }
 
-  Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
-  Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+  if (N->getOpcode() == ISD::FP_ROUND) {
+    Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
+    Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+  } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) {
+    SDValue DTyOpLo = DAG.getValueType(LoVT);
+    SDValue DTyOpHi = DAG.getValueType(HiVT);
+    SDValue STyOpLo = DAG.getValueType(Lo.getValueType());
+    SDValue STyOpHi = DAG.getValueType(Hi.getValueType());
+    SDValue RndOp = N->getOperand(3);
+    SDValue SatOp = N->getOperand(4);
+    ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+    Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp,
+                              CvtCode);
+    Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp,
+                              CvtCode);
+  } else {
+    Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+    Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+  }
 }
 
 void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
@@ -989,11 +967,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::CTTZ:
     case ISD::CTLZ:
     case ISD::CTPOP:
+    case ISD::FP_EXTEND:
     case ISD::FP_TO_SINT:
     case ISD::FP_TO_UINT:
     case ISD::SINT_TO_FP:
     case ISD::UINT_TO_FP:
-    case ISD::FP_EXTEND:
     case ISD::FTRUNC:
     case ISD::TRUNCATE:
     case ISD::SIGN_EXTEND:
@@ -1270,15 +1248,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     Res = WidenVecRes_Shift(N);
     break;
 
+  case ISD::ANY_EXTEND:
+  case ISD::FP_EXTEND:
   case ISD::FP_ROUND:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
+  case ISD::SIGN_EXTEND:
   case ISD::SINT_TO_FP:
-  case ISD::UINT_TO_FP:
   case ISD::TRUNCATE:
-  case ISD::SIGN_EXTEND:
+  case ISD::UINT_TO_FP:
   case ISD::ZERO_EXTEND:
-  case ISD::ANY_EXTEND:
     Res = WidenVecRes_Convert(N);
     break;
 
@@ -1286,15 +1265,20 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::CTPOP:
   case ISD::CTTZ:
   case ISD::FABS:
+  case ISD::FCEIL:
   case ISD::FCOS:
-  case ISD::FNEG:
-  case ISD::FSIN:
-  case ISD::FSQRT:
   case ISD::FEXP:
   case ISD::FEXP2:
+  case ISD::FFLOOR:
   case ISD::FLOG:
-  case ISD::FLOG2:
   case ISD::FLOG10:
+  case ISD::FLOG2:
+  case ISD::FNEARBYINT:
+  case ISD::FNEG:
+  case ISD::FRINT:
+  case ISD::FSIN:
+  case ISD::FSQRT:
+  case ISD::FTRUNC:
     Res = WidenVecRes_Unary(N);
     break;
   }
@@ -2004,7 +1988,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
   case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
   case ISD::STORE:              Res = WidenVecOp_STORE(N); break;
 
-  case ISD::FP_ROUND:
+  case ISD::FP_EXTEND:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
   case ISD::SINT_TO_FP:
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index e3da208..7b560d1 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -570,13 +570,20 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
           TRI->getMinimalPhysRegClass(Reg, VT);
         const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
 
-        // If cross copy register class is null, then it must be possible copy
-        // the value directly. Do not try duplicate the def.
+        // If cross copy register class is the same as RC, then it must be
+        // possible copy the value directly. Do not try duplicate the def.
+        // If cross copy register class is not the same as RC, then it's
+        // possible to copy the value but it require cross register class copies
+        // and it is expensive.
+        // If cross copy register class is null, then it's not possible to copy
+        // the value at all.
         SUnit *NewDef = 0;
-        if (DestRC)
+        if (DestRC != RC) {
           NewDef = CopyAndMoveSuccessors(LRDef);
-        else
-          DestRC = RC;
+          if (!DestRC && !NewDef)
+            report_fatal_error("Can't handle live physical "
+                               "register dependency!");
+        }
         if (!NewDef) {
           // Issue copies, these can be expensive cross register class copies.
           SmallVector<SUnit*, 2> Copies;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 0b548b2..88bd450 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -70,6 +70,50 @@ static cl::opt<bool> DisableSchedCycles(
   "disable-sched-cycles", cl::Hidden, cl::init(false),
   cl::desc("Disable cycle-level precision during preRA scheduling"));
 
+// Temporary sched=list-ilp flags until the heuristics are robust.
+// Some options are also available under sched=list-hybrid.
+static cl::opt<bool> DisableSchedRegPressure(
+  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
+  cl::desc("Disable regpressure priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedLiveUses(
+  "disable-sched-live-uses", cl::Hidden, cl::init(true),
+  cl::desc("Disable live use priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedVRegCycle(
+  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
+  cl::desc("Disable virtual register cycle interference checks"));
+static cl::opt<bool> DisableSchedPhysRegJoin(
+  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
+  cl::desc("Disable physreg def-use affinity"));
+static cl::opt<bool> DisableSchedStalls(
+  "disable-sched-stalls", cl::Hidden, cl::init(true),
+  cl::desc("Disable no-stall priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedCriticalPath(
+  "disable-sched-critical-path", cl::Hidden, cl::init(false),
+  cl::desc("Disable critical path priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedHeight(
+  "disable-sched-height", cl::Hidden, cl::init(false),
+  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+
+static cl::opt<int> MaxReorderWindow(
+  "max-sched-reorder", cl::Hidden, cl::init(6),
+  cl::desc("Number of instructions to allow ahead of the critical path "
+           "in sched=list-ilp"));
+
+static cl::opt<unsigned> AvgIPC(
+  "sched-avg-ipc", cl::Hidden, cl::init(1),
+  cl::desc("Average inst/cycle whan no target itinerary exists."));
+
+#ifndef NDEBUG
+namespace {
+  // For sched=list-ilp, Count the number of times each factor comes into play.
+  enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
+         FactStatic, FactOther, NumFactors };
+}
+static const char *FactorName[NumFactors] =
+{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
+static int FactorCount[NumFactors];
+#endif //!NDEBUG
+
 namespace {
 //===----------------------------------------------------------------------===//
 /// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -103,6 +147,10 @@ private:
   /// MinAvailableCycle - Cycle of the soonest available instruction.
   unsigned MinAvailableCycle;
 
+  /// IssueCount - Count instructions issued in this cycle
+  /// Currently valid only for bottom-up scheduling.
+  unsigned IssueCount;
+
   /// LiveRegDefs - A set of physical registers and their definition
   /// that are "live". These nodes must be scheduled before any other nodes that
   /// modifies the registers can be scheduled.
@@ -234,8 +282,14 @@ void ScheduleDAGRRList::Schedule() {
   DEBUG(dbgs()
         << "********** List Scheduling BB#" << BB->getNumber()
         << " '" << BB->getName() << "' **********\n");
+#ifndef NDEBUG
+  for (int i = 0; i < NumFactors; ++i) {
+    FactorCount[i] = 0;
+  }
+#endif //!NDEBUG
 
   CurCycle = 0;
+  IssueCount = 0;
   MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
   NumLiveRegs = 0;
   LiveRegDefs.resize(TRI->getNumRegs(), NULL);
@@ -258,6 +312,11 @@ void ScheduleDAGRRList::Schedule() {
   else
     ListScheduleTopDown();
 
+#ifndef NDEBUG
+  for (int i = 0; i < NumFactors; ++i) {
+    DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
+  }
+#endif // !NDEBUG
   AvailableQueue->releaseState();
 }
 
@@ -295,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
     if (Height < MinAvailableCycle)
       MinAvailableCycle = Height;
 
-    if (isReady(SU)) {
+    if (isReady(PredSU)) {
       AvailableQueue->push(PredSU);
     }
     // CapturePred and others may have left the node in the pending queue, avoid
@@ -383,6 +442,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
   if (NextCycle <= CurCycle)
     return;
 
+  IssueCount = 0;
   AvailableQueue->setCurCycle(NextCycle);
   if (!HazardRec->isEnabled()) {
     // Bypass lots of virtual calls in case of long latency.
@@ -407,6 +467,13 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
   if (DisableSchedCycles)
     return;
 
+  // FIXME: Nodes such as CopyFromReg probably should not advance the current
+  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
+  // has predecessors the cycle will be advanced when they are scheduled.
+  // But given the crude nature of modeling latency though such nodes, we
+  // currently need to treat these nodes like real instructions.
+  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
+
   unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
 
   // Bump CurCycle to account for latency. We assume the latency of other
@@ -477,6 +544,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
   }
 }
 
+static void resetVRegCycle(SUnit *SU);
+
 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
 /// count of its predecessors. If a predecessor pending count is zero, add it to
 /// the Available queue.
@@ -486,12 +555,13 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
 
 #ifndef NDEBUG
   if (CurCycle < SU->getHeight())
-    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
+    DEBUG(dbgs() << "   Height [" << SU->getHeight()
+          << "] pipeline stall!\n");
 #endif
 
   // FIXME: Do not modify node height. It may interfere with
   // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
-  // node it's ready cycle can aid heuristics, and after scheduling it can
+  // node its ready cycle can aid heuristics, and after scheduling it can
   // indicate the scheduled cycle.
   SU->setHeightToAtLeast(CurCycle);
 
@@ -502,6 +572,12 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
 
   AvailableQueue->ScheduledNode(SU);
 
+  // If HazardRec is disabled, and each inst counts as one cycle, then
+  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
+  // PendingQueue for schedulers that implement HasReadyFilter.
+  if (!HazardRec->isEnabled() && AvgIPC < 2)
+    AdvanceToCycle(CurCycle + 1);
+
   // Update liveness of predecessors before successors to avoid treating a
   // two-address node as a live range def.
   ReleasePredecessors(SU);
@@ -518,16 +594,25 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
     }
   }
 
+  resetVRegCycle(SU);
+
   SU->isScheduled = true;
 
   // Conditions under which the scheduler should eagerly advance the cycle:
   // (1) No available instructions
   // (2) All pipelines full, so available instructions must have hazards.
   //
-  // If HazardRec is disabled, count each inst as one cycle.
-  if (!HazardRec->isEnabled() || HazardRec->atIssueLimit()
-      || AvailableQueue->empty())
-    AdvanceToCycle(CurCycle + 1);
+  // If HazardRec is disabled, the cycle was pre-advanced before calling
+  // ReleasePredecessors. In that case, IssueCount should remain 0.
+  //
+  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
+  if (HazardRec->isEnabled() || AvgIPC > 1) {
+    if (SU->getNode() && SU->getNode()->isMachineOpcode())
+      ++IssueCount;
+    if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
+        || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
+      AdvanceToCycle(CurCycle + 1);
+  }
 }
 
 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
@@ -872,6 +957,15 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
       AddPred(SuccSU, D);
       DelDeps.push_back(std::make_pair(SuccSU, *I));
     }
+    else {
+      // Avoid scheduling the def-side copy before other successors. Otherwise
+      // we could introduce another physreg interference on the copy and
+      // continue inserting copies indefinitely.
+      SDep D(CopyFromSU, SDep::Order, /*Latency=*/0,
+             /*Reg=*/0, /*isNormalMemory=*/false,
+             /*isMustAlias=*/false, /*isArtificial=*/true);
+      AddPred(SuccSU, D);
+    }
   }
   for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
     RemovePred(DelDeps[i].first, DelDeps[i].second);
@@ -1077,13 +1171,19 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
       TRI->getMinimalPhysRegClass(Reg, VT);
     const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
 
-    // If cross copy register class is null, then it must be possible copy
-    // the value directly. Do not try duplicate the def.
+    // If cross copy register class is the same as RC, then it must be possible
+    // copy the value directly. Do not try duplicate the def.
+    // If cross copy register class is not the same as RC, then it's possible to
+    // copy the value but it require cross register class copies and it is
+    // expensive.
+    // If cross copy register class is null, then it's not possible to copy
+    // the value at all.
     SUnit *NewDef = 0;
-    if (DestRC)
+    if (DestRC != RC) {
       NewDef = CopyAndMoveSuccessors(LRDef);
-    else
-      DestRC = RC;
+      if (!DestRC && !NewDef)
+        report_fatal_error("Can't handle live physical register dependency!");
+    }
     if (!NewDef) {
       // Issue copies, these can be expensive cross register class copies.
       SmallVector<SUnit*, 2> Copies;
@@ -1139,7 +1239,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
   // priority. If it is not ready put it back.  Schedule the node.
   Sequence.reserve(SUnits.size());
   while (!AvailableQueue->empty()) {
-    DEBUG(dbgs() << "\n*** Examining Available\n";
+    DEBUG(dbgs() << "\nExamining Available:\n";
           AvailableQueue->dump(this));
 
     // Pick the best node to schedule taking all constraints into
@@ -1318,7 +1418,7 @@ struct src_ls_rr_sort : public queue_sort {
 struct hybrid_ls_rr_sort : public queue_sort {
   enum {
     IsBottomUp = true,
-    HasReadyFilter = true
+    HasReadyFilter = false
   };
 
   RegReductionPQBase *SPQ;
@@ -1337,7 +1437,7 @@ struct hybrid_ls_rr_sort : public queue_sort {
 struct ilp_ls_rr_sort : public queue_sort {
   enum {
     IsBottomUp = true,
-    HasReadyFilter = true
+    HasReadyFilter = false
   };
 
   RegReductionPQBase *SPQ;
@@ -1395,7 +1495,7 @@ public:
       std::fill(RegPressure.begin(), RegPressure.end(), 0);
       for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
              E = TRI->regclass_end(); I != E; ++I)
-        RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
+        RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
     }
   }
 
@@ -1422,6 +1522,8 @@ public:
   unsigned getNodePriority(const SUnit *SU) const;
 
   unsigned getNodeOrdering(const SUnit *SU) const {
+    if (!SU->getNode()) return 0;
+
     return scheduleDAG->DAG->GetOrdering(SU->getNode());
   }
 
@@ -1450,7 +1552,9 @@ public:
 
   bool HighRegPressure(const SUnit *SU) const;
 
-  bool MayReduceRegPressure(SUnit *SU);
+  bool MayReduceRegPressure(SUnit *SU) const;
+
+  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
 
   void ScheduledNode(SUnit *SU);
 
@@ -1538,6 +1642,20 @@ ILPBURRPriorityQueue;
 //           Static Node Priority for Register Pressure Reduction
 //===----------------------------------------------------------------------===//
 
+// Check for special nodes that bypass scheduling heuristics.
+// Currently this pushes TokenFactor nodes down, but may be used for other
+// pseudo-ops as well.
+//
+// Return -1 to schedule right above left, 1 for left above right.
+// Return 0 if no bias exists.
+static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
+  bool LSchedLow = left->isScheduleLow;
+  bool RSchedLow = right->isScheduleLow;
+  if (LSchedLow != RSchedLow)
+    return LSchedLow < RSchedLow ? 1 : -1;
+  return 0;
+}
+
 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
 /// Smaller number is the higher priority.
 static unsigned
@@ -1576,17 +1694,6 @@ void RegReductionPQBase::CalculateSethiUllmanNumbers() {
     CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
 }
 
-void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
-  SUnits = &sunits;
-  // Add pseudo dependency edges for two-address nodes.
-  AddPseudoTwoAddrDeps();
-  // Reroute edges to nodes with multiple uses.
-  if (!TracksRegPressure)
-    PrescheduleNodesWithMultipleUses();
-  // Calculate node priorities.
-  CalculateSethiUllmanNumbers();
-}
-
 void RegReductionPQBase::addNode(const SUnit *SU) {
   unsigned SUSize = SethiUllmanNumbers.size();
   if (SUnits->size() > SUSize)
@@ -1625,7 +1732,17 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
     // If SU does not have a register def, schedule it close to its uses
     // because it does not lengthen any live ranges.
     return 0;
+#if 1
   return SethiUllmanNumbers[SU->NodeNum];
+#else
+  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
+  if (SU->isCallOp) {
+    // FIXME: This assumes all of the defs are used as call operands.
+    int NP = (int)Priority - SU->getNode()->getNumValues();
+    return (NP > 0) ? NP : 0;
+  }
+  return Priority;
+#endif
 }
 
 //===----------------------------------------------------------------------===//
@@ -1670,7 +1787,7 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
   return false;
 }
 
-bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) {
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
   const SDNode *N = SU->getNode();
 
   if (!N->isMachineOpcode() || !SU->NumSuccs)
@@ -1688,10 +1805,60 @@ bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) {
   return false;
 }
 
+// Compute the register pressure contribution by this instruction by count up
+// for uses that are not live and down for defs. Only count register classes
+// that are already under high pressure. As a side effect, compute the number of
+// uses of registers that are already live.
+//
+// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
+// so could probably be factored.
+int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
+  LiveUses = 0;
+  int PDiff = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+    SUnit *PredSU = I->getSUnit();
+    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+    // to cover the number of registers defined (they are all live).
+    if (PredSU->NumRegDefsLeft == 0) {
+      if (PredSU->getNode()->isMachineOpcode())
+        ++LiveUses;
+      continue;
+    }
+    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+         RegDefPos.IsValid(); RegDefPos.Advance()) {
+      EVT VT = RegDefPos.GetValue();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      if (RegPressure[RCId] >= RegLimit[RCId])
+        ++PDiff;
+    }
+  }
+  const SDNode *N = SU->getNode();
+
+  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
+    return PDiff;
+
+  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+  for (unsigned i = 0; i != NumDefs; ++i) {
+    EVT VT = N->getValueType(i);
+    if (!N->hasAnyUseOfValue(i))
+      continue;
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    if (RegPressure[RCId] >= RegLimit[RCId])
+      --PDiff;
+  }
+  return PDiff;
+}
+
 void RegReductionPQBase::ScheduledNode(SUnit *SU) {
   if (!TracksRegPressure)
     return;
 
+  if (!SU->getNode())
+    return;
+
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
     if (I->isCtrl())
@@ -1758,6 +1925,8 @@ void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
     return;
 
   const SDNode *N = SU->getNode();
+  if (!N) return;
+
   if (!N->isMachineOpcode()) {
     if (N->getOpcode() != ISD::CopyToReg)
       return;
@@ -1871,7 +2040,29 @@ static unsigned calcMaxScratches(const SUnit *SU) {
   return Scratches;
 }
 
-/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
+/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
+/// CopyFromReg from a virtual register.
+static bool hasOnlyLiveInOpers(const SUnit *SU) {
+  bool RetVal = false;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;
+    const SUnit *PredSU = I->getSUnit();
+    if (PredSU->getNode() &&
+        PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
+      unsigned Reg =
+        cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        RetVal = true;
+        continue;
+      }
+    }
+    return false;
+  }
+  return RetVal;
+}
+
+/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
 /// CopyToReg to a virtual register. This SU def is probably a liveout and
 /// it has no other use. It should be scheduled closer to the terminator.
 static bool hasOnlyLiveOutUses(const SUnit *SU) {
@@ -1893,20 +2084,67 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) {
   return RetVal;
 }
 
-/// UnitsSharePred - Return true if the two scheduling units share a common
-/// data predecessor.
-static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
-  SmallSet<const SUnit*, 4> Preds;
-  for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
+// Set isVRegCycle for a node with only live in opers and live out uses. Also
+// set isVRegCycle for its CopyFromReg operands.
+//
+// This is only relevant for single-block loops, in which case the VRegCycle
+// node is likely an induction variable in which the operand and target virtual
+// registers should be coalesced (e.g. pre/post increment values). Setting the
+// isVRegCycle flag helps the scheduler prioritize other uses of the same
+// CopyFromReg so that this node becomes the virtual register "kill". This
+// avoids interference between the values live in and out of the block and
+// eliminates a copy inside the loop.
+static void initVRegCycle(SUnit *SU) {
+  if (DisableSchedVRegCycle)
+    return;
+
+  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
+    return;
+
+  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
+
+  SU->isVRegCycle = true;
+
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;
+    I->getSUnit()->isVRegCycle = true;
+  }
+}
+
+// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
+// CopyFromReg operands. We should no longer penalize other uses of this VReg.
+static void resetVRegCycle(SUnit *SU) {
+  if (!SU->isVRegCycle)
+    return;
+
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
        I != E; ++I) {
     if (I->isCtrl()) continue;  // ignore chain preds
-    Preds.insert(I->getSUnit());
+    SUnit *PredSU = I->getSUnit();
+    if (PredSU->isVRegCycle) {
+      assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
+             "VRegCycle def must be CopyFromReg");
+      I->getSUnit()->isVRegCycle = 0;
+    }
   }
-  for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
+}
+
+// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
+// means a node that defines the VRegCycle has not been scheduled yet.
+static bool hasVRegCycleUse(const SUnit *SU) {
+  // If this SU also defines the VReg, don't hoist it as a "use".
+  if (SU->isVRegCycle)
+    return false;
+
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
        I != E; ++I) {
     if (I->isCtrl()) continue;  // ignore chain preds
-    if (Preds.count(I->getSUnit()))
+    if (I->getSUnit()->isVRegCycle &&
+        I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
+      DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
       return true;
+    }
   }
   return false;
 }
@@ -1926,23 +2164,12 @@ static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
 // Return 0 if latency-based priority is equivalent.
 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
                             RegReductionPQBase *SPQ) {
-  // If the two nodes share an operand and one of them has a single
-  // use that is a live out copy, favor the one that is live out. Otherwise
-  // it will be difficult to eliminate the copy if the instruction is a
-  // loop induction variable update. e.g.
-  // BB:
-  // sub r1, r3, #1
-  // str r0, [r2, r3]
-  // mov r3, r1
-  // cmp
-  // bne BB
-  bool SharePred = UnitsSharePred(left, right);
-  // FIXME: Only adjust if BB is a loop back edge.
-  // FIXME: What's the cost of a copy?
-  int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
-  int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
-  int LHeight = (int)left->getHeight() - LBonus;
-  int RHeight = (int)right->getHeight() - RBonus;
+  // Scheduling an instruction that uses a VReg whose postincrement has not yet
+  // been scheduled will induce a copy. Model this as an extra cycle of latency.
+  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
+  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
+  int LHeight = (int)left->getHeight() + LPenalty;
+  int RHeight = (int)right->getHeight() + RPenalty;
 
   bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
     BUHasStall(left, LHeight, SPQ);
@@ -1953,45 +2180,102 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
   // If scheduling either one of the node will cause a pipeline stall, sort
   // them according to their height.
   if (LStall) {
-    if (!RStall)
+    if (!RStall) {
+      DEBUG(++FactorCount[FactStall]);
       return 1;
-    if (LHeight != RHeight)
+    }
+    if (LHeight != RHeight) {
+      DEBUG(++FactorCount[FactStall]);
       return LHeight > RHeight ? 1 : -1;
-  } else if (RStall)
+    }
+  } else if (RStall) {
+    DEBUG(++FactorCount[FactStall]);
     return -1;
+  }
 
   // If either node is scheduling for latency, sort them by height/depth
   // and latency.
   if (!checkPref || (left->SchedulingPref == Sched::Latency ||
                      right->SchedulingPref == Sched::Latency)) {
     if (DisableSchedCycles) {
-      if (LHeight != RHeight)
+      if (LHeight != RHeight) {
+        DEBUG(++FactorCount[FactHeight]);
         return LHeight > RHeight ? 1 : -1;
+      }
     }
     else {
       // If neither instruction stalls (!LStall && !RStall) then
-      // it's height is already covered so only its depth matters. We also reach
+      // its height is already covered so only its depth matters. We also reach
       // this if both stall but have the same height.
-      unsigned LDepth = left->getDepth();
-      unsigned RDepth = right->getDepth();
+      int LDepth = left->getDepth() - LPenalty;
+      int RDepth = right->getDepth() - RPenalty;
       if (LDepth != RDepth) {
+        DEBUG(++FactorCount[FactDepth]);
         DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
               << ") depth " << LDepth << " vs SU (" << right->NodeNum
               << ") depth " << RDepth << "\n");
         return LDepth < RDepth ? 1 : -1;
       }
     }
-    if (left->Latency != right->Latency)
+    if (left->Latency != right->Latency) {
+      DEBUG(++FactorCount[FactOther]);
       return left->Latency > right->Latency ? 1 : -1;
+    }
   }
   return 0;
 }
 
 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
+  // Schedule physical register definitions close to their use. This is
+  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
+  // long as shortening physreg live ranges is generally good, we can defer
+  // creating a subtarget hook.
+  if (!DisableSchedPhysRegJoin) {
+    bool LHasPhysReg = left->hasPhysRegDefs;
+    bool RHasPhysReg = right->hasPhysRegDefs;
+    if (LHasPhysReg != RHasPhysReg) {
+      DEBUG(++FactorCount[FactRegUses]);
+      #ifndef NDEBUG
+      const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
+      #endif
+      DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
+            << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
+            << PhysRegMsg[RHasPhysReg] << "\n");
+      return LHasPhysReg < RHasPhysReg;
+    }
+  }
+
+  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
   unsigned LPriority = SPQ->getNodePriority(left);
   unsigned RPriority = SPQ->getNodePriority(right);
-  if (LPriority != RPriority)
+
+  // Be really careful about hoisting call operands above previous calls.
+  // Only allows it if it would reduce register pressure.
+  if (left->isCall && right->isCallOp) {
+    unsigned RNumVals = right->getNode()->getNumValues();
+    RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
+  }
+  if (right->isCall && left->isCallOp) {
+    unsigned LNumVals = left->getNode()->getNumValues();
+    LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
+  }
+
+  if (LPriority != RPriority) {
+    DEBUG(++FactorCount[FactStatic]);
     return LPriority > RPriority;
+  }
+
+  // One or both of the nodes are calls and their sethi-ullman numbers are the
+  // same, then keep source order.
+  if (left->isCall || right->isCall) {
+    unsigned LOrder = SPQ->getNodeOrdering(left);
+    unsigned ROrder = SPQ->getNodeOrdering(right);
+
+    // Prefer an ordering where the lower the non-zero order number, the higher
+    // the preference.
+    if ((LOrder || ROrder) && LOrder != ROrder)
+      return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+  }
 
   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
   // e.g.
@@ -2012,40 +2296,62 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
   // This creates more short live intervals.
   unsigned LDist = closestSucc(left);
   unsigned RDist = closestSucc(right);
-  if (LDist != RDist)
+  if (LDist != RDist) {
+    DEBUG(++FactorCount[FactOther]);
     return LDist < RDist;
+  }
 
   // How many registers becomes live when the node is scheduled.
   unsigned LScratch = calcMaxScratches(left);
   unsigned RScratch = calcMaxScratches(right);
-  if (LScratch != RScratch)
+  if (LScratch != RScratch) {
+    DEBUG(++FactorCount[FactOther]);
     return LScratch > RScratch;
+  }
+
+  // Comparing latency against a call makes little sense unless the node
+  // is register pressure-neutral.
+  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
+    return (left->NodeQueueId > right->NodeQueueId);
 
-  if (!DisableSchedCycles) {
+  // Do not compare latencies when one or both of the nodes are calls.
+  if (!DisableSchedCycles &&
+      !(left->isCall || right->isCall)) {
     int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
     if (result != 0)
       return result > 0;
   }
   else {
-    if (left->getHeight() != right->getHeight())
+    if (left->getHeight() != right->getHeight()) {
+      DEBUG(++FactorCount[FactHeight]);
       return left->getHeight() > right->getHeight();
+    }
 
-    if (left->getDepth() != right->getDepth())
+    if (left->getDepth() != right->getDepth()) {
+      DEBUG(++FactorCount[FactDepth]);
       return left->getDepth() < right->getDepth();
+    }
   }
 
   assert(left->NodeQueueId && right->NodeQueueId &&
          "NodeQueueId cannot be zero");
+  DEBUG(++FactorCount[FactOther]);
   return (left->NodeQueueId > right->NodeQueueId);
 }
 
 // Bottom up
 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (int res = checkSpecialNodes(left, right))
+    return res > 0;
+
   return BURRSort(left, right, SPQ);
 }
 
 // Source order, otherwise bottom up.
 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (int res = checkSpecialNodes(left, right))
+    return res > 0;
+
   unsigned LOrder = SPQ->getNodeOrdering(left);
   unsigned ROrder = SPQ->getNodeOrdering(right);
 
@@ -2077,6 +2383,9 @@ bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
 
 // Return true if right should be scheduled with higher priority than left.
 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (int res = checkSpecialNodes(left, right))
+    return res > 0;
+
   if (left->isCall || right->isCall)
     // No way to compute latency of calls.
     return BURRSort(left, right, SPQ);
@@ -2086,16 +2395,18 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   // Avoid causing spills. If register pressure is high, schedule for
   // register pressure reduction.
   if (LHigh && !RHigh) {
+    DEBUG(++FactorCount[FactPressureDiff]);
     DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
           << right->NodeNum << ")\n");
     return true;
   }
   else if (!LHigh && RHigh) {
+    DEBUG(++FactorCount[FactPressureDiff]);
     DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
           << left->NodeNum << ")\n");
     return false;
   }
-  else if (!LHigh && !RHigh) {
+  if (!LHigh && !RHigh) {
     int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
     if (result != 0)
       return result > 0;
@@ -2112,34 +2423,118 @@ bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
       != ScheduleHazardRecognizer::NoHazard)
     return false;
 
-  return SU->getHeight() <= CurCycle;
+  return true;
 }
 
+static bool canEnableCoalescing(SUnit *SU) {
+  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+    // CopyToReg should be close to its uses to facilitate coalescing and
+    // avoid spilling.
+    return true;
+
+  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+      Opc == TargetOpcode::SUBREG_TO_REG ||
+      Opc == TargetOpcode::INSERT_SUBREG)
+    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+    // close to their uses to facilitate coalescing.
+    return true;
+
+  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+    // If SU does not have a register def, schedule it close to its uses
+    // because it does not lengthen any live ranges.
+    return true;
+
+  return false;
+}
+
+// list-ilp is currently an experimental scheduler that allows various
+// heuristics to be enabled prior to the normal register reduction logic.
 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (int res = checkSpecialNodes(left, right))
+    return res > 0;
+
   if (left->isCall || right->isCall)
     // No way to compute latency of calls.
     return BURRSort(left, right, SPQ);
 
-  bool LHigh = SPQ->HighRegPressure(left);
-  bool RHigh = SPQ->HighRegPressure(right);
-  // Avoid causing spills. If register pressure is high, schedule for
-  // register pressure reduction.
-  if (LHigh && !RHigh)
-    return true;
-  else if (!LHigh && RHigh)
-    return false;
-  else if (!LHigh && !RHigh) {
-    // Low register pressure situation, schedule to maximize instruction level
-    // parallelism.
-    if (left->NumPreds > right->NumPreds)
-      return false;
-    else if (left->NumPreds < right->NumPreds)
-      return false;
+  unsigned LLiveUses = 0, RLiveUses = 0;
+  int LPDiff = 0, RPDiff = 0;
+  if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
+    LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
+    RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
+  }
+  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
+    DEBUG(++FactorCount[FactPressureDiff]);
+    DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
+          << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
+    return LPDiff > RPDiff;
+  }
+
+  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
+    bool LReduce = canEnableCoalescing(left);
+    bool RReduce = canEnableCoalescing(right);
+    DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
+    if (LReduce && !RReduce) return false;
+    if (RReduce && !LReduce) return true;
+  }
+
+  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
+    DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
+          << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
+    DEBUG(++FactorCount[FactRegUses]);
+    return LLiveUses < RLiveUses;
+  }
+
+  if (!DisableSchedStalls) {
+    bool LStall = BUHasStall(left, left->getHeight(), SPQ);
+    bool RStall = BUHasStall(right, right->getHeight(), SPQ);
+    if (LStall != RStall) {
+      DEBUG(++FactorCount[FactHeight]);
+      return left->getHeight() > right->getHeight();
+    }
+  }
+
+  if (!DisableSchedCriticalPath) {
+    int spread = (int)left->getDepth() - (int)right->getDepth();
+    if (std::abs(spread) > MaxReorderWindow) {
+      DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
+            << left->getDepth() << " != SU(" << right->NodeNum << "): "
+            << right->getDepth() << "\n");
+      DEBUG(++FactorCount[FactDepth]);
+      return left->getDepth() < right->getDepth();
+    }
+  }
+
+  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
+    int spread = (int)left->getHeight() - (int)right->getHeight();
+    if (std::abs(spread) > MaxReorderWindow) {
+      DEBUG(++FactorCount[FactHeight]);
+      return left->getHeight() > right->getHeight();
+    }
   }
 
   return BURRSort(left, right, SPQ);
 }
 
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+  SUnits = &sunits;
+  // Add pseudo dependency edges for two-address nodes.
+  AddPseudoTwoAddrDeps();
+  // Reroute edges to nodes with multiple uses.
+  if (!TracksRegPressure)
+    PrescheduleNodesWithMultipleUses();
+  // Calculate node priorities.
+  CalculateSethiUllmanNumbers();
+
+  // For single block loops, mark nodes that look like canonical IV increments.
+  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
+    for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
+      initVRegCycle(&sunits[i]);
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //                    Preschedule for Register Pressure
 //===----------------------------------------------------------------------===//
@@ -2417,6 +2812,9 @@ static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
 
 // Top down
 bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+  if (int res = checkSpecialNodes(left, right))
+    return res < 0;
+
   unsigned LPriority = SPQ->getNodePriority(left);
   unsigned RPriority = SPQ->getNodePriority(right);
   bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 477c1ff..9f2f012 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -27,12 +27,21 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 STATISTIC(LoadsClustered, "Number of loads clustered together");
 
+// This allows latency based scheduler to notice high latency instructions
+// without a target itinerary. The choise if number here has more to do with
+// balancing scheduler heursitics than with the actual machine latency.
+static cl::opt<int> HighLatencyCycles(
+  "sched-high-latency-cycles", cl::Hidden, cl::init(10),
+  cl::desc("Roughly estimate the number of cycles that 'long latency'"
+           "instructions take for targets with no itinerary"));
+
 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
   : ScheduleDAG(mf),
     InstrItins(mf.getTarget().getInstrItineraryData()) {}
@@ -72,11 +81,15 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
   SUnit *SU = NewSUnit(Old->getNode());
   SU->OrigNode = Old->OrigNode;
   SU->Latency = Old->Latency;
+  SU->isVRegCycle = Old->isVRegCycle;
   SU->isCall = Old->isCall;
+  SU->isCallOp = Old->isCallOp;
   SU->isTwoAddress = Old->isTwoAddress;
   SU->isCommutable = Old->isCommutable;
   SU->hasPhysRegDefs = Old->hasPhysRegDefs;
   SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+  SU->isScheduleHigh = Old->isScheduleHigh;
+  SU->isScheduleLow = Old->isScheduleLow;
   SU->SchedulingPref = Old->SchedulingPref;
   Old->isCloned = true;
   return SU;
@@ -273,6 +286,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
   Worklist.push_back(DAG->getRoot().getNode());
   Visited.insert(DAG->getRoot().getNode());
 
+  SmallVector<SUnit*, 8> CallSUnits;
   while (!Worklist.empty()) {
     SDNode *NI = Worklist.pop_back_val();
 
@@ -325,6 +339,15 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
       if (!HasGlueUse) break;
     }
 
+    if (NodeSUnit->isCall)
+      CallSUnits.push_back(NodeSUnit);
+
+    // Schedule zero-latency TokenFactor below any nodes that may increase the
+    // schedule height. Otherwise, ancestors of the TokenFactor may appear to
+    // have false stalls.
+    if (NI->getOpcode() == ISD::TokenFactor)
+      NodeSUnit->isScheduleLow = true;
+
     // If there are glue operands involved, N is now the bottom-most node
     // of the sequence of nodes that are glued together.
     // Update the SUnit.
@@ -338,6 +361,20 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
     // Assign the Latency field of NodeSUnit using target-provided information.
     ComputeLatency(NodeSUnit);
   }
+
+  // Find all call operands.
+  while (!CallSUnits.empty()) {
+    SUnit *SU = CallSUnits.pop_back_val();
+    for (const SDNode *SUNode = SU->getNode(); SUNode;
+         SUNode = SUNode->getGluedNode()) {
+      if (SUNode->getOpcode() != ISD::CopyToReg)
+        continue;
+      SDNode *SrcN = SUNode->getOperand(2).getNode();
+      if (isPassiveNode(SrcN)) continue;   // Not scheduled.
+      SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
+      SrcSU->isCallOp = true;
+    }
+  }
 }
 
 void ScheduleDAGSDNodes::AddSchedEdges() {
@@ -403,6 +440,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
 
         // If this is a ctrl dep, latency is 1.
         unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+        // Special-case TokenFactor chains as zero-latency.
+        if(isChain && OpN->getOpcode() == ISD::TokenFactor)
+          OpLatency = 0;
+
         const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                                OpLatency, PhysReg);
         if (!isChain && !UnitLatencies) {
@@ -410,11 +451,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
           ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
         }
 
-        if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) {
+        if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
           // Multiple register uses are combined in the same SUnit. For example,
           // we could have a set of glued nodes with all their defs consumed by
           // another set of glued nodes. Register pressure tracking sees this as
           // a single use, so to keep pressure balanced we reduce the defs.
+          //
+          // We can't tell (without more book-keeping) if this results from
+          // glued nodes or duplicate operands. As long as we don't reduce
+          // NumRegDefsLeft to zero, we handle the common cases well.
           --OpSU->NumRegDefsLeft;
         }
       }
@@ -437,6 +482,10 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
 
 // Initialize NumNodeDefs for the current Node's opcode.
 void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+  // Check for phys reg copy.
+  if (!Node)
+    return;
+
   if (!Node->isMachineOpcode()) {
     if (Node->getOpcode() == ISD::CopyFromReg)
       NodeNumDefs = 1;
@@ -499,6 +548,16 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
 }
 
 void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+  SDNode *N = SU->getNode();
+
+  // TokenFactor operands are considered zero latency, and some schedulers
+  // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
+  // whenever node latency is nonzero.
+  if (N && N->getOpcode() == ISD::TokenFactor) {
+    SU->Latency = 0;
+    return;
+  }
+
   // Check to see if the scheduler cares about latencies.
   if (ForceUnitLatencies()) {
     SU->Latency = 1;
@@ -506,7 +565,11 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
   }
 
   if (!InstrItins || InstrItins->isEmpty()) {
-    SU->Latency = 1;
+    if (N && N->isMachineOpcode() &&
+        TII->isHighLatencyDef(N->getMachineOpcode()))
+      SU->Latency = HighLatencyCycles;
+    else
+      SU->Latency = 1;
     return;
   }
 
@@ -573,7 +636,7 @@ namespace {
   };
 }
 
-/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node.
+/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
 static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
                                InstrEmitter &Emitter,
                     SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index cc7310e..b5f68f3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -80,6 +80,12 @@ namespace llvm {
     /// flagged together nodes with a single SUnit.
     virtual void BuildSchedGraph(AliasAnalysis *AA);
 
+    /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
+    /// CopyToReg and its only active data operands are CopyFromReg within a
+    /// single block loop.
+    ///
+    void InitVRegCycleFlag(SUnit *SU);
+
     /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
     ///
     void InitNumRegDefsLeft(SUnit *SU);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9120288..c2711c8 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1418,9 +1418,9 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
 
 /// getShiftAmountOperand - Return the specified value casted to
 /// the target's desired shift amount type.
-SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {
+SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
   EVT OpTy = Op.getValueType();
-  MVT ShTy = TLI.getShiftAmountTy(OpTy);
+  MVT ShTy = TLI.getShiftAmountTy(LHSTy);
   if (OpTy == ShTy || OpTy.isVector()) return Op;
 
   ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ?  ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -2482,6 +2482,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
            "Vector element count mismatch!");
     if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
       return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+    else if (OpOpcode == ISD::UNDEF)
+      // sext(undef) = 0, because the top bits will all be the same.
+      return getConstant(0, VT);
     break;
   case ISD::ZERO_EXTEND:
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
@@ -2496,6 +2499,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     if (OpOpcode == ISD::ZERO_EXTEND)   // (zext (zext x)) -> (zext x)
       return getNode(ISD::ZERO_EXTEND, DL, VT,
                      Operand.getNode()->getOperand(0));
+    else if (OpOpcode == ISD::UNDEF)
+      // zext(undef) = 0, because the top bits will be zero.
+      return getConstant(0, VT);
     break;
   case ISD::ANY_EXTEND:
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
@@ -2512,6 +2518,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
         OpOpcode == ISD::ANY_EXTEND)
       // (ext (zext x)) -> (zext x)  and  (ext (sext x)) -> (sext x)
       return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+    else if (OpOpcode == ISD::UNDEF)
+      return getUNDEF(VT);
 
     // (ext (trunx x)) -> x
     if (OpOpcode == ISD::TRUNCATE) {
@@ -5904,7 +5912,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::UINT_TO_FP:  return "uint_to_fp";
   case ISD::FP_TO_SINT:  return "fp_to_sint";
   case ISD::FP_TO_UINT:  return "fp_to_uint";
-  case ISD::BITCAST:     return "bit_convert";
+  case ISD::BITCAST:     return "bitcast";
   case ISD::FP16_TO_FP32: return "fp16_to_fp32";
   case ISD::FP32_TO_FP16: return "fp32_to_fp16";
 
@@ -6226,6 +6234,9 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
     return;
 
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    // Don't follow chain operands.
+    if (N->getOperand(i).getValueType() == MVT::Other)
+      continue;
     OS << '\n';
     printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
   }
@@ -6238,7 +6249,7 @@ void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
 
 void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
   // Don't print impossibly deep things.
-  printrWithDepth(OS, G, 100);
+  printrWithDepth(OS, G, 10);
 }
 
 void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
@@ -6247,7 +6258,7 @@ void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
 
 void SDNode::dumprFull(const SelectionDAG *G) const {
   // Don't print impossibly deep things.
-  dumprWithDepth(G, 100);
+  dumprWithDepth(G, 10);
 }
 
 static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
@@ -6311,7 +6322,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
     case ISD::ROTL:
     case ISD::ROTR:
       Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
-                                getShiftAmountOperand(Operands[1])));
+                                getShiftAmountOperand(Operands[0].getValueType(),
+                                                      Operands[1])));
       break;
     case ISD::SIGN_EXTEND_INREG:
     case ISD::FP_ROUND_INREG: {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 48d9bbb..b02a7b6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -50,7 +50,6 @@
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -84,9 +83,7 @@ LimitFPPrecision("limit-float-precision",
 // %buffer = alloca [4096 x i8]
 // %data = load [4096 x i8]* %argPtr
 // store [4096 x i8] %data, [4096 x i8]* %buffer
-static cl::opt<unsigned>
-MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"),
-                  cl::init(64), cl::Hidden);
+static const unsigned MaxParallelChains = 64;
 
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                       const SDValue *Parts, unsigned NumParts,
@@ -1130,15 +1127,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
         else if (F->paramHasAttr(0, Attribute::ZExt))
           ExtendKind = ISD::ZERO_EXTEND;
 
-        // FIXME: C calling convention requires the return type to be promoted
-        // to at least 32-bit. But this is not necessary for non-C calling
-        // conventions. The frontend should mark functions whose return values
-        // require promoting with signext or zeroext attributes.
-        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
-          EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
-          if (VT.bitsLT(MinVT))
-            VT = MinVT;
-        }
+        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
+          VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind);
 
         unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
         EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
@@ -1153,9 +1143,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
           Flags.setInReg();
 
         // Propagate extension type if any
-        if (F->paramHasAttr(0, Attribute::SExt))
+        if (ExtendKind == ISD::SIGN_EXTEND)
           Flags.setSExt();
-        else if (F->paramHasAttr(0, Attribute::ZExt))
+        else if (ExtendKind == ISD::ZERO_EXTEND)
           Flags.setZExt();
 
         for (unsigned i = 0; i < NumParts; ++i) {
@@ -2029,9 +2019,13 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
     APInt Range = ComputeRange(LEnd, RBegin);
     assert((Range - 2ULL).isNonNegative() &&
            "Invalid case distance");
-    double LDensity = (double)LSize.roundToDouble() /
+    // Use volatile double here to avoid excess precision issues on some hosts,
+    // e.g. that use 80-bit X87 registers.
+    volatile double LDensity =
+       (double)LSize.roundToDouble() /
                            (LEnd - First + 1ULL).roundToDouble();
-    double RDensity = (double)RSize.roundToDouble() /
+    volatile double RDensity =
+      (double)RSize.roundToDouble() /
                            (Last - RBegin + 1ULL).roundToDouble();
     double Metric = Range.logBase2()*(LDensity+RDensity);
     // Should always split in some non-trivial place
@@ -4039,10 +4033,6 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
   if (DV.isInlinedFnArgument(MF.getFunction()))
     return false;
 
-  MachineBasicBlock *MBB = FuncInfo.MBB;
-  if (MBB != &MF.front())
-    return false;
-
   unsigned Reg = 0;
   if (Arg->hasByValAttr()) {
     // Byval arguments' frame index is recorded during argument lowering.
@@ -4413,7 +4403,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::eh_sjlj_dispatch_setup: {
     DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
-                            getRoot(), getValue(I.getArgOperand(0))));
+                            getRoot()));
     return 0;
   }
 
@@ -4682,9 +4672,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::flt_rounds:
     setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
     return 0;
-  case Intrinsic::trap:
-    DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+  case Intrinsic::trap: {
+    StringRef TrapFuncName = getTrapFunctionName();
+    if (TrapFuncName.empty()) {
+      DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+      return 0;
+    }
+    TargetLowering::ArgListTy Args;
+    std::pair<SDValue, SDValue> Result =
+      TLI.LowerCallTo(getRoot(), I.getType(),
+                 false, false, false, false, 0, CallingConv::C,
+                 /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+                 DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
+                 Args, DAG, getCurDebugLoc());
+    DAG.setRoot(Result.second);
     return 0;
+  }
   case Intrinsic::uadd_with_overflow:
     return implVisitAluOverflow(I, ISD::UADDO);
   case Intrinsic::sadd_with_overflow:
@@ -4937,15 +4940,21 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
              DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
                          DAG.getVTList(&RetTys[0], RetTys.size()),
                          &ReturnValues[0], ReturnValues.size()));
-
   }
 
-  // As a special case, a null chain means that a tail call has been emitted and
-  // the DAG root is already updated.
-  if (Result.second.getNode())
-    DAG.setRoot(Result.second);
-  else
+  // Assign order to nodes here. If the call does not produce a result, it won't
+  // be mapped to a SDNode and visit() will not assign it an order number.
+  if (!Result.second.getNode()) {
+    // As a special case, a null chain means that a tail call has been emitted and
+    // the DAG root is already updated.
     HasTailCall = true;
+    ++SDNodeOrder;
+    AssignOrderingToNode(DAG.getRoot().getNode());
+  } else {
+    DAG.setRoot(Result.second);
+    ++SDNodeOrder;
+    AssignOrderingToNode(Result.second.getNode());
+  }
 
   if (LandingPad) {
     // Insert a label at the end of the invoke call to mark the try range.  This
@@ -5211,12 +5220,11 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
   LowerCallTo(&I, Callee, I.isTailCall());
 }
 
-namespace llvm {
+namespace {
 
 /// AsmOperandInfo - This contains information for each constraint that we are
 /// lowering.
-class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo :
-    public TargetLowering::AsmOperandInfo {
+class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
 public:
   /// CallOperand - If this is the result output operand or a clobber
   /// this is null, otherwise it is the incoming operand to the CallInst.
@@ -5304,7 +5312,7 @@ private:
 
 typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
 
-} // end llvm namespace.
+} // end anonymous namespace
 
 /// isAllocatableRegister - If the specified register is safe to allocate,
 /// i.e. it isn't a stack pointer or some other special register, return the
@@ -5363,11 +5371,13 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF,
 ///   OpInfo describes the operand.
 ///   Input and OutputRegs are the set of already allocated physical registers.
 ///
-void SelectionDAGBuilder::
-GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
-                     std::set<unsigned> &OutputRegs,
-                     std::set<unsigned> &InputRegs) {
-  LLVMContext &Context = FuncInfo.Fn->getContext();
+static void GetRegistersForValue(SelectionDAG &DAG,
+                                 const TargetLowering &TLI,
+                                 DebugLoc DL,
+                                 SDISelAsmOperandInfo &OpInfo,
+                                 std::set<unsigned> &OutputRegs,
+                                 std::set<unsigned> &InputRegs) {
+  LLVMContext &Context = *DAG.getContext();
 
   // Compute whether this value requires an input register, an output register,
   // or both.
@@ -5413,7 +5423,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
       // vector types).
       EVT RegVT = *PhysReg.second->vt_begin();
       if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
-        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
                                          RegVT, OpInfo.CallOperand);
         OpInfo.ConstraintVT = RegVT;
       } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
@@ -5423,7 +5433,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
         // machine.
         RegVT = EVT::getIntegerVT(Context,
                                   OpInfo.ConstraintVT.getSizeInBits());
-        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
                                          RegVT, OpInfo.CallOperand);
         OpInfo.ConstraintVT = RegVT;
       }
@@ -5694,7 +5704,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // If this constraint is for a specific register, allocate it before
     // anything else.
     if (OpInfo.ConstraintType == TargetLowering::C_Register)
-      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
+                           InputRegs);
   }
 
   // Second pass - Loop over all of the operands, assigning virtual or physregs
@@ -5705,7 +5716,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // C_Register operands have already been allocated, Other/Memory don't need
     // to be.
     if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
-      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
+                           InputRegs);
   }
 
   // AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -6181,7 +6193,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
 
   // For a function returning void, there is no return value. We can't create
   // such a node, so we just return a null return value in that case. In
-  // that case, nothing will actualy look at the value.
+  // that case, nothing will actually look at the value.
   if (ReturnValues.empty())
     return std::make_pair(SDValue(), Chain);
 
@@ -6397,7 +6409,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
       SDB->setValue(I, Res);
 
       // If this argument is live outside of the entry block, insert a copy from
-      // whereever we got it to the vreg that other BB's will reference it as.
+      // wherever we got it to the vreg that other BB's will reference it as.
       SDB->CopyToExportRegsIfNeeded(I);
     }
   }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 8f466d9..a689b76 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -23,7 +23,6 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <vector>
-#include <set>
 
 namespace llvm {
 
@@ -60,7 +59,6 @@ class MDNode;
 class PHINode;
 class PtrToIntInst;
 class ReturnInst;
-class SDISelAsmOperandInfo;
 class SDDbgValue;
 class SExtInst;
 class SelectInst;
@@ -380,10 +378,6 @@ public:
     assert(N.getNode() == 0 && "Already set a value for this node!");
     N = NewN;
   }
-  
-  void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
-                            std::set<unsigned> &OutputRegs, 
-                            std::set<unsigned> &InputRegs);
 
   void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
                             MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 68ba966..fdf3767 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -421,10 +421,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   return true;
 }
 
-void
-SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
-                                   BasicBlock::const_iterator End,
-                                   bool &HadTailCall) {
+void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
+                                        BasicBlock::const_iterator End,
+                                        bool &HadTailCall) {
   // Lower all of the non-terminator instructions. If a call is emitted
   // as a tail call, cease emitting nodes for this block. Terminators
   // are handled below.
@@ -438,7 +437,6 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
 
   // Final step, emit the lowered DAG as machine code.
   CodeGenAndEmitDAG();
-  return;
 }
 
 void SelectionDAGISel::ComputeLiveOutVRegInfo() {
@@ -489,13 +487,19 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   if (TimePassesIsEnabled)
     GroupName = "Instruction Selection and Scheduling";
   std::string BlockName;
+  int BlockNumber = -1;
+#ifdef NDEBUG
   if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
       ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
       ViewSUnitDAGs)
+#endif
+  {
+    BlockNumber = FuncInfo->MBB->getNumber();
     BlockName = MF->getFunction()->getNameStr() + ":" +
                 FuncInfo->MBB->getBasicBlock()->getNameStr();
-
-  DEBUG(dbgs() << "Initial selection DAG:\n"; CurDAG->dump());
+  }
+  DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
+        << " '" << BlockName << "'\n"; CurDAG->dump());
 
   if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
 
@@ -505,7 +509,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->Combine(Unrestricted, *AA, OptLevel);
   }
 
-  DEBUG(dbgs() << "Optimized lowered selection DAG:\n"; CurDAG->dump());
+  DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
+        << " '" << BlockName << "'\n"; CurDAG->dump());
 
   // Second step, hack on the DAG until it only uses operations and types that
   // the target supports.
@@ -518,7 +523,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     Changed = CurDAG->LegalizeTypes();
   }
 
-  DEBUG(dbgs() << "Type-legalized selection DAG:\n"; CurDAG->dump());
+  DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
+        << " '" << BlockName << "'\n"; CurDAG->dump());
 
   if (Changed) {
     if (ViewDAGCombineLT)
@@ -531,8 +537,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
       CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
     }
 
-    DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n";
-          CurDAG->dump());
+    DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
+          << " '" << BlockName << "'\n"; CurDAG->dump());
   }
 
   {
@@ -556,8 +562,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
       CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
     }
 
-    DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n";
-          CurDAG->dump());
+    DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
+          << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
   }
 
   if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
@@ -567,7 +573,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->Legalize(OptLevel);
   }
 
-  DEBUG(dbgs() << "Legalized selection DAG:\n"; CurDAG->dump());
+  DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
+        << " '" << BlockName << "'\n"; CurDAG->dump());
 
   if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
 
@@ -577,7 +584,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
   }
 
-  DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump());
+  DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
+        << " '" << BlockName << "'\n"; CurDAG->dump());
 
   if (OptLevel != CodeGenOpt::None)
     ComputeLiveOutVRegInfo();
@@ -591,7 +599,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     DoInstructionSelection();
   }
 
-  DEBUG(dbgs() << "Selected selection DAG:\n"; CurDAG->dump());
+  DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber
+        << " '" << BlockName << "'\n"; CurDAG->dump());
 
   if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
 
@@ -632,7 +641,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 }
 
 void SelectionDAGISel::DoInstructionSelection() {
-  DEBUG(errs() << "===== Instruction selection begins:\n");
+  DEBUG(errs() << "===== Instruction selection begins: BB#"
+        << FuncInfo->MBB->getNumber()
+        << " '" << FuncInfo->MBB->getName() << "'\n");
 
   PreprocessISelDAG();
 
@@ -735,16 +746,49 @@ void SelectionDAGISel::PrepareEHLandingPad() {
 
 
 
-
+/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
+/// load into the specified FoldInst.  Note that we could have a sequence where
+/// multiple LLVM IR instructions are folded into the same machineinstr.  For
+/// example we could have:
+///   A: x = load i32 *P
+///   B: y = icmp A, 42
+///   C: br y, ...
+///
+/// In this scenario, LI is "A", and FoldInst is "C".  We know about "B" (and
+/// any other folded instructions) because it is between A and C.
+///
+/// If we succeed in folding the load into the operation, return true.
+///
 bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+                                             const Instruction *FoldInst,
                                              FastISel *FastIS) {
+  // We know that the load has a single use, but don't know what it is.  If it
+  // isn't one of the folded instructions, then we can't succeed here.  Handle
+  // this by scanning the single-use users of the load until we get to FoldInst.
+  unsigned MaxUsers = 6;  // Don't scan down huge single-use chains of instrs.
+  
+  const Instruction *TheUser = LI->use_back();
+  while (TheUser != FoldInst &&   // Scan up until we find FoldInst.
+         // Stay in the right block.
+         TheUser->getParent() == FoldInst->getParent() &&
+         --MaxUsers) {  // Don't scan too far.
+    // If there are multiple or no uses of this instruction, then bail out.
+    if (!TheUser->hasOneUse())
+      return false;
+    
+    TheUser = TheUser->use_back();
+  }
+  
   // Don't try to fold volatile loads.  Target has to deal with alignment
   // constraints.
   if (LI->isVolatile()) return false;
 
-  // Figure out which vreg this is going into.
+  // Figure out which vreg this is going into.  If there is no assigned vreg yet
+  // then there actually was no reference to it.  Perhaps the load is referenced
+  // by a dead instruction.
   unsigned LoadReg = FastIS->getRegForValue(LI);
-  assert(LoadReg && "Load isn't already assigned a vreg? ");
+  if (LoadReg == 0)
+    return false;
 
   // Check to see what the uses of this vreg are.  If it has no uses, or more
   // than one use (at the machine instr level) then we can't fold it.
@@ -764,7 +808,7 @@ bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
          "The only use of the vreg must be a use, we haven't emitted the def!");
 
   MachineInstr *User = &*RI;
-  
+
   // Set the insertion point properly.  Folding the load can cause generation of
   // other random instructions (like sign extends) for addressing modes, make
   // sure they get inserted in a logical place before the new instruction.
@@ -817,6 +861,17 @@ static void CheckLineNumbers(const MachineBasicBlock *MBB) {
 }
 #endif
 
+/// isFoldedOrDeadInstruction - Return true if the specified instruction is
+/// side-effect free and is either dead or folded into a generated instruction.
+/// Return false if it needs to be emitted.
+static bool isFoldedOrDeadInstruction(const Instruction *I,
+                                      FunctionLoweringInfo *FuncInfo) {
+  return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
+         !isa<TerminatorInst>(I) && // Terminators aren't folded.
+         !isa<DbgInfoIntrinsic>(I) &&  // Debug instructions aren't folded.
+         !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
+}
+
 void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Initialize the Fast-ISel state, if needed.
   FastISel *FastIS = 0;
@@ -843,15 +898,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       }
 
       if (AllPredsVisited) {
-        for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end();
-             I != E && isa<PHINode>(I); ++I) {
+        for (BasicBlock::const_iterator I = LLVMBB->begin();
+             isa<PHINode>(I); ++I)
           FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I));
-        }
       } else {
-        for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end();
-             I != E && isa<PHINode>(I); ++I) {
+        for (BasicBlock::const_iterator I = LLVMBB->begin();
+             isa<PHINode>(I); ++I)
           FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I));
-        }
       }
 
       FuncInfo->VisitedBBs.insert(LLVMBB);
@@ -899,10 +952,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
         const Instruction *Inst = llvm::prior(BI);
 
         // If we no longer require this instruction, skip it.
-        if (!Inst->mayWriteToMemory() &&
-            !isa<TerminatorInst>(Inst) &&
-            !isa<DbgInfoIntrinsic>(Inst) &&
-            !FuncInfo->isExportedInst(Inst))
+        if (isFoldedOrDeadInstruction(Inst, FuncInfo))
           continue;
 
         // Bottom-up: reset the insert pos at the top, after any local-value
@@ -911,16 +961,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
 
         // Try to select the instruction with FastISel.
         if (FastIS->SelectInstruction(Inst)) {
-          // If fast isel succeeded, check to see if there is a single-use
-          // non-volatile load right before the selected instruction, and see if
-          // the load is used by the instruction.  If so, try to fold it.
-          const Instruction *BeforeInst = 0;
-          if (Inst != Begin)
-            BeforeInst = llvm::prior(llvm::prior(BI));
-          if (BeforeInst && isa<LoadInst>(BeforeInst) &&
-              BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst &&
-              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS))
-            --BI; // If we succeeded, don't re-select the load.
+          // If fast isel succeeded, skip over all the folded instructions, and
+          // then see if there is a load right before the selected instructions.
+          // Try to fold the load if so.
+          const Instruction *BeforeInst = Inst;
+          while (BeforeInst != Begin) {
+            BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst));
+            if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
+              break;
+          }
+          if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
+              BeforeInst->hasOneUse() &&
+              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS))
+            // If we succeeded, don't re-select the load.
+            BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
           continue;
         }
 
@@ -974,11 +1028,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     else
       ++NumFastIselBlocks;
 
-    // Run SelectionDAG instruction selection on the remainder of the block
-    // not handled by FastISel. If FastISel is not run, this is the entire
-    // block.
-    bool HadTailCall;
-    SelectBasicBlock(Begin, BI, HadTailCall);
+    if (Begin != BI) {
+      // Run SelectionDAG instruction selection on the remainder of the block
+      // not handled by FastISel. If FastISel is not run, this is the entire
+      // block.
+      bool HadTailCall;
+      SelectBasicBlock(Begin, BI, HadTailCall);
+    }
 
     FinishBasicBlock();
     FuncInfo->PHINodesToUpdate.clear();
@@ -2392,6 +2448,18 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
                               CurDAG->getRegister(RegNo, VT), (SDNode*)0));
       continue;
     }
+    case OPC_EmitRegister2: {
+      // For targets w/ more than 256 register names, the register enum
+      // values are stored in two bytes in the matcher table (just like
+      // opcodes).
+      MVT::SimpleValueType VT =
+        (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+      unsigned RegNo = MatcherTable[MatcherIndex++];
+      RegNo |= MatcherTable[MatcherIndex++] << 8;
+      RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+                              CurDAG->getRegister(RegNo, VT), (SDNode*)0));
+      continue;
+    }
 
     case OPC_EmitConvertToTarget:  {
       // Convert from IMM/FPIMM to target version.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 76eb945..cd1647b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -90,7 +90,8 @@ namespace llvm {
     /// If you want to override the dot attributes printed for a particular
     /// edge, override this method.
     template<typename EdgeIter>
-    static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
+    static std::string getEdgeAttributes(const void *Node, EdgeIter EI,
+                                         const SelectionDAG *Graph) {
       SDValue Op = EI.getNode()->getOperand(EI.getOperand());
       EVT VT = Op.getValueType();
       if (VT == MVT::Glue)
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 35b847c..15606af 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -93,6 +93,19 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::UREM_I32] = "__umodsi3";
   Names[RTLIB::UREM_I64] = "__umoddi3";
   Names[RTLIB::UREM_I128] = "__umodti3";
+
+  // These are generally not available.
+  Names[RTLIB::SDIVREM_I8] = 0;
+  Names[RTLIB::SDIVREM_I16] = 0;
+  Names[RTLIB::SDIVREM_I32] = 0;
+  Names[RTLIB::SDIVREM_I64] = 0;
+  Names[RTLIB::SDIVREM_I128] = 0;
+  Names[RTLIB::UDIVREM_I8] = 0;
+  Names[RTLIB::UDIVREM_I16] = 0;
+  Names[RTLIB::UDIVREM_I32] = 0;
+  Names[RTLIB::UDIVREM_I64] = 0;
+  Names[RTLIB::UDIVREM_I128] = 0;
+
   Names[RTLIB::NEG_I32] = "__negsi2";
   Names[RTLIB::NEG_I64] = "__negdi2";
   Names[RTLIB::ADD_F32] = "__addsf3";
@@ -1665,6 +1678,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
         if (!ShAmt)
           break;
+        SDValue Shift = In.getOperand(1);
+        if (TLO.LegalTypes()) {
+          uint64_t ShVal = ShAmt->getZExtValue();
+          Shift =
+            TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType()));
+        }
+
         APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
                                                OperandBitWidth - BitWidth);
         HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
@@ -1678,7 +1698,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
                                                    Op.getValueType(),
                                                    NewTrunc,
-                                                   In.getOperand(1)));
+                                                   Shift));
         }
         break;
       }
@@ -1829,7 +1849,6 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                               ISD::CondCode Cond, bool foldBooleans,
                               DAGCombinerInfo &DCI, DebugLoc dl) const {
   SelectionDAG &DAG = DCI.DAG;
-  LLVMContext &Context = *DAG.getContext();
 
   // These setcc operations always fold.
   switch (Cond) {
@@ -1840,12 +1859,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   case ISD::SETTRUE2:  return DAG.getConstant(1, VT);
   }
 
-  if (isa<ConstantSDNode>(N0.getNode())) {
-    // Ensure that the constant occurs on the RHS, and fold constant
-    // comparisons.
+  // Ensure that the constant occurs on the RHS, and fold constant
+  // comparisons.
+  if (isa<ConstantSDNode>(N0.getNode()))
     return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
-  }
-
+  
   if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
     const APInt &C1 = N1C->getAPIntValue();
 
@@ -1898,6 +1916,42 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
     }
 
+    // (zext x) == C --> x == (trunc C)
+    if (DCI.isBeforeLegalize() && N0->hasOneUse() &&
+        (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+      unsigned MinBits = N0.getValueSizeInBits();
+      SDValue PreZExt;
+      if (N0->getOpcode() == ISD::ZERO_EXTEND) {
+        // ZExt
+        MinBits = N0->getOperand(0).getValueSizeInBits();
+        PreZExt = N0->getOperand(0);
+      } else if (N0->getOpcode() == ISD::AND) {
+        // DAGCombine turns costly ZExts into ANDs
+        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+          if ((C->getAPIntValue()+1).isPowerOf2()) {
+            MinBits = C->getAPIntValue().countTrailingOnes();
+            PreZExt = N0->getOperand(0);
+          }
+      } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
+        // ZEXTLOAD
+        if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
+          MinBits = LN0->getMemoryVT().getSizeInBits();
+          PreZExt = N0;
+        }
+      }
+
+      // Make sure we're not loosing bits from the constant.
+      if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) {
+        EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
+        if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
+          // Will get folded away.
+          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt);
+          SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT);
+          return DAG.getSetCC(dl, VT, Trunc, C, Cond);
+        }
+      }
+    }
+
     // If the LHS is '(and load, const)', the RHS is 0,
     // the test is for equality or unsigned, and all 1 bits of the const are
     // in the same partial word, see if we can shorten the load.
@@ -1936,7 +1990,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         }
       }
       if (bestWidth) {
-        EVT newVT = EVT::getIntegerVT(Context, bestWidth);
+        EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
         if (newVT.isRound()) {
           EVT PtrType = Lod->getOperand(1).getValueType();
           SDValue Ptr = Lod->getBasePtr();
@@ -3174,26 +3228,39 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
 
   // FIXME: We should use a narrower constant when the upper
   // bits are known to be zero.
-  ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1));
-  APInt::mu magics = N1C->getAPIntValue().magicu();
+  const APInt &N1C = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+  APInt::mu magics = N1C.magicu();
+
+  SDValue Q = N->getOperand(0);
+
+  // If the divisor is even, we can avoid using the expensive fixup by shifting
+  // the divided value upfront.
+  if (magics.a != 0 && !N1C[0]) {
+    unsigned Shift = N1C.countTrailingZeros();
+    Q = DAG.getNode(ISD::SRL, dl, VT, Q,
+                    DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType())));
+    if (Created)
+      Created->push_back(Q.getNode());
+
+    // Get magic number for the shifted divisor.
+    magics = N1C.lshr(Shift).magicu(Shift);
+    assert(magics.a == 0 && "Should use cheap fixup now");
+  }
 
   // Multiply the numerator (operand 0) by the magic value
   // FIXME: We should support doing a MUL in a wider type
-  SDValue Q;
   if (isOperationLegalOrCustom(ISD::MULHU, VT))
-    Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0),
-                    DAG.getConstant(magics.m, VT));
+    Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT));
   else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
-    Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT),
-                              N->getOperand(0),
-                              DAG.getConstant(magics.m, VT)).getNode(), 1);
+    Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
+                            DAG.getConstant(magics.m, VT)).getNode(), 1);
   else
     return SDValue();       // No mulhu or equvialent
   if (Created)
     Created->push_back(Q.getNode());
 
   if (magics.a == 0) {
-    assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
+    assert(magics.s < N1C.getBitWidth() &&
            "We shouldn't generate an undefined shift!");
     return DAG.getNode(ISD::SRL, dl, VT, Q,
                  DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 7b5bca4..160f38f 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -277,7 +277,7 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) {
   // Initialize data flow sets.
   clearAnticAvailSets();
 
-  // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
+  // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
   bool changed = true;
   unsigned iterations = 0;
   while (changed) {
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 2843c1a..35b8e14 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -104,6 +104,18 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
+void SimpleRegisterCoalescing::markAsJoined(MachineInstr *CopyMI) {
+  /// Joined copies are not deleted immediately, but kept in JoinedCopies.
+  JoinedCopies.insert(CopyMI);
+
+  /// Mark all register operands of CopyMI as <undef> so they won't affect dead
+  /// code elimination.
+  for (MachineInstr::mop_iterator I = CopyMI->operands_begin(),
+       E = CopyMI->operands_end(); I != E; ++I)
+    if (I->isReg())
+      I->setIsUndef(true);
+}
+
 /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
 /// being the source and IntB being the dest, thus this defines a value number
 /// in IntB.  If the source value number (in IntA) is defined by a copy from B,
@@ -196,15 +208,14 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
   if (ValLR+1 != BLR) return false;
 
   // If a live interval is a physical register, conservatively check if any
-  // of its sub-registers is overlapping the live interval of the virtual
-  // register. If so, do not coalesce.
-  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg) &&
-      *tri_->getSubRegisters(IntB.reg)) {
-    for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
-      if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) {
+  // of its aliases is overlapping the live interval of the virtual register.
+  // If so, do not coalesce.
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+    for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
+      if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) {
         DEBUG({
-            dbgs() << "\t\tInterfere with sub-register ";
-            li_->getInterval(*SR).print(dbgs(), tri_);
+            dbgs() << "\t\tInterfere with alias ";
+            li_->getInterval(*AS).print(dbgs(), tri_);
           });
         return false;
       }
@@ -471,7 +482,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
     DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
     assert(DVNI->def == DefIdx);
     BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
-    JoinedCopies.insert(UseMI);
+    markAsJoined(UseMI);
   }
 
   // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
@@ -901,6 +912,58 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
   return removeIntervalIfEmpty(li, li_, tri_);
 }
 
+/// shouldJoinPhys - Return true if a copy involving a physreg should be joined.
+/// We need to be careful about coalescing a source physical register with a
+/// virtual register. Once the coalescing is done, it cannot be broken and these
+/// are not spillable! If the destination interval uses are far away, think
+/// twice about coalescing them!
+bool SimpleRegisterCoalescing::shouldJoinPhys(CoalescerPair &CP) {
+  bool Allocatable = li_->isAllocatable(CP.getDstReg());
+  LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg());
+
+  /// Always join simple intervals that are defined by a single copy from a
+  /// reserved register. This doesn't increase register pressure, so it is
+  /// always beneficial.
+  if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue())
+    return true;
+
+  if (DisablePhysicalJoin) {
+    DEBUG(dbgs() << "\tPhysreg joins disabled.\n");
+    return false;
+  }
+
+  // Only coalesce to allocatable physreg, we don't want to risk modifying
+  // reserved registers.
+  if (!Allocatable) {
+    DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
+    return false;  // Not coalescable.
+  }
+
+  // Don't join with physregs that have a ridiculous number of live
+  // ranges. The data structure performance is really bad when that
+  // happens.
+  if (li_->hasInterval(CP.getDstReg()) &&
+      li_->getInterval(CP.getDstReg()).ranges.size() > 1000) {
+    ++numAborts;
+    DEBUG(dbgs()
+          << "\tPhysical register live interval too complicated, abort!\n");
+    return false;
+  }
+
+  // FIXME: Why are we skipping this test for partial copies?
+  //        CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
+  if (!CP.isPartial()) {
+    const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
+    unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+    unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
+    if (Length > Threshold) {
+      ++numAborts;
+      DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
+      return false;
+    }
+  }
+  return true;
+}
 
 /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
 /// two virtual registers from different register classes.
@@ -973,27 +1036,25 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     return false;  // Not coalescable.
   }
 
-  if (DisablePhysicalJoin && CP.isPhys()) {
-    DEBUG(dbgs() << "\tPhysical joins disabled.\n");
-    return false;
-  }
-
-  DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_));
+  DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_)
+               << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx())
+               << "\n");
 
   // Enforce policies.
   if (CP.isPhys()) {
-    DEBUG(dbgs() <<" with physreg " << PrintReg(CP.getDstReg(), tri_) << "\n");
-    // Only coalesce to allocatable physreg.
-    if (!li_->isAllocatable(CP.getDstReg())) {
-      DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
-      return false;  // Not coalescable.
+    if (!shouldJoinPhys(CP)) {
+      // Before giving up coalescing, if definition of source is defined by
+      // trivial computation, try rematerializing it.
+      if (!CP.isFlipped() &&
+          ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
+                                  CP.getDstReg(), 0, CopyMI))
+        return true;
+      return false;
     }
   } else {
-    DEBUG(dbgs() << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx())
-                 << " to " << CP.getNewRC()->getName() << "\n");
-
     // Avoid constraining virtual register regclass too much.
     if (CP.isCrossClass()) {
+      DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n");
       if (DisableCrossClassJoin) {
         DEBUG(dbgs() << "\tCross-class joins disabled.\n");
         return false;
@@ -1002,8 +1063,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
                                  mri_->getRegClass(CP.getSrcReg()),
                                  mri_->getRegClass(CP.getDstReg()),
                                  CP.getNewRC())) {
-        DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: "
-                     << CP.getNewRC()->getName() << ".\n");
+        DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n");
         Again = true;  // May be possible to coalesce later.
         return false;
       }
@@ -1015,45 +1075,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       CP.flip();
   }
 
-  // We need to be careful about coalescing a source physical register with a
-  // virtual register. Once the coalescing is done, it cannot be broken and
-  // these are not spillable! If the destination interval uses are far away,
-  // think twice about coalescing them!
-  // FIXME: Why are we skipping this test for partial copies?
-  //        CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
-  if (!CP.isPartial() && CP.isPhys()) {
-    LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg());
-
-    // Don't join with physregs that have a ridiculous number of live
-    // ranges. The data structure performance is really bad when that
-    // happens.
-    if (li_->hasInterval(CP.getDstReg()) &&
-        li_->getInterval(CP.getDstReg()).ranges.size() > 1000) {
-      ++numAborts;
-      DEBUG(dbgs()
-           << "\tPhysical register live interval too complicated, abort!\n");
-      return false;
-    }
-
-    const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
-    unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
-    unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
-    if (Length > Threshold &&
-        std::distance(mri_->use_nodbg_begin(CP.getSrcReg()),
-                      mri_->use_nodbg_end()) * Threshold < Length) {
-      // Before giving up coalescing, if definition of source is defined by
-      // trivial computation, try rematerializing it.
-      if (!CP.isFlipped() &&
-          ReMaterializeTrivialDef(JoinVInt, true, CP.getDstReg(), 0, CopyMI))
-        return true;
-
-      ++numAborts;
-      DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
-      Again = true;  // May be possible to coalesce later.
-      return false;
-    }
-  }
-
   // Okay, attempt to join these two intervals.  On failure, this returns false.
   // Otherwise, if one of the intervals being joined is a physreg, this method
   // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
@@ -1072,7 +1093,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     if (!CP.isPartial()) {
       if (AdjustCopiesBackFrom(CP, CopyMI) ||
           RemoveCopyByCommutingDef(CP, CopyMI)) {
-        JoinedCopies.insert(CopyMI);
+        markAsJoined(CopyMI);
         DEBUG(dbgs() << "\tTrivial!\n");
         return true;
       }
@@ -1092,7 +1113,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   }
 
   // Remember to delete the copy instruction.
-  JoinedCopies.insert(CopyMI);
+  markAsJoined(CopyMI);
 
   UpdateRegDefsUses(CP);
 
@@ -1568,9 +1589,7 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
       if (UseMI->isIdentityCopy())
         continue;
       SlotIndex Idx = li_->getInstructionIndex(UseMI);
-      // FIXME: Should this be Idx != UseIdx? SlotIndex() will return something
-      // that compares higher than any other interval.
-      if (Idx >= Start && Idx < End && Idx >= UseIdx) {
+      if (Idx >= Start && Idx < End && (!UseIdx.isValid() || Idx >= UseIdx)) {
         LastUse = &Use;
         UseIdx = Idx.getUseIndex();
       }
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index 56703df..65cf542 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -68,16 +68,6 @@ namespace llvm {
       initializeSimpleRegisterCoalescingPass(*PassRegistry::getPassRegistry());
     }
 
-    struct InstrSlots {
-      enum {
-        LOAD  = 0,
-        USE   = 1,
-        DEF   = 2,
-        STORE = 3,
-        NUM   = 4
-      };
-    };
-    
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
     virtual void releaseMemory();
 
@@ -148,6 +138,9 @@ namespace llvm {
                                  unsigned DstReg, unsigned DstSubIdx,
                                  MachineInstr *CopyMI);
 
+    /// shouldJoinPhys - Return true if a physreg copy should be joined.
+    bool shouldJoinPhys(CoalescerPair &CP);
+
     /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
     /// two virtual registers from different register classes.
     bool isWinToJoinCrossClass(unsigned SrcReg,
@@ -186,6 +179,9 @@ namespace llvm {
     /// cycles Start and End or NULL if there are no uses.
     MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End,
                                     unsigned Reg, SlotIndex &LastUseIdx) const;
+
+    /// markAsJoined - Remember that CopyMI has already been joined.
+    void markAsJoined(MachineInstr *CopyMI);
   };
 
 } // End llvm namespace
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 13e1454..43904a7 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -442,25 +442,18 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
   BasicBlock *DispatchBlock =
     BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
 
-  // Add a call to dispatch_setup at the start of the dispatch block. This is
-  // expanded to any target-specific setup that needs to be done.
-  Value *SetupArg =
-    CastInst::Create(Instruction::BitCast, FunctionContext,
-                     Type::getInt8PtrTy(F.getContext()), "",
-                     DispatchBlock);
-  CallInst::Create(DispatchSetupFn, SetupArg, "", DispatchBlock);
-
   // Insert a load of the callsite in the dispatch block, and a switch on its
-  // value.  By default, we go to a block that just does an unwind (which is the
-  // correct action for a standard call).
-  BasicBlock *UnwindBlock =
-    BasicBlock::Create(F.getContext(), "unwindbb", &F);
-  Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
+  // value. By default, we issue a trap statement.
+  BasicBlock *TrapBlock =
+    BasicBlock::Create(F.getContext(), "trapbb", &F);
+  CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap),
+                   "", TrapBlock);
+  new UnreachableInst(F.getContext(), TrapBlock);
 
   Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
                                      DispatchBlock);
   SwitchInst *DispatchSwitch =
-    SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(),
+    SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(),
                        DispatchBlock);
   // Split the entry block to insert the conditional branch for the setjmp.
   BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
@@ -524,6 +517,11 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
   Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
                                         "dispatch",
                                         EntryBB->getTerminator());
+
+  // Add a call to dispatch_setup after the setjmp call. This is expanded to any
+  // target-specific setup that needs to be done.
+  CallInst::Create(DispatchSetupFn, "", EntryBB->getTerminator());
+
   // check the return value of the setjmp. non-zero goes to dispatcher.
   Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
                                  ICmpInst::ICMP_EQ, DispatchVal, Zero,
@@ -564,7 +562,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
   // Replace all unwinds with a branch to the unwind handler.
   // ??? Should this ever happen with sjlj exceptions?
   for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
-    BranchInst::Create(UnwindBlock, Unwinds[i]);
+    BranchInst::Create(TrapBlock, Unwinds[i]);
     Unwinds[i]->eraseFromParent();
   }
 
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 6e3fa90..ca79caf 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -10,47 +10,20 @@
 #define DEBUG_TYPE "slotindexes"
 
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
 using namespace llvm;
 
-
-// Yep - these are thread safe. See the header for details. 
-namespace {
-
-
-  class EmptyIndexListEntry : public IndexListEntry {
-  public:
-    EmptyIndexListEntry() : IndexListEntry(EMPTY_KEY) {}
-  };
-
-  class TombstoneIndexListEntry : public IndexListEntry {
-  public:
-    TombstoneIndexListEntry() : IndexListEntry(TOMBSTONE_KEY) {}
-  };
-
-  // The following statics are thread safe. They're read only, and you
-  // can't step from them to any other list entries.
-  ManagedStatic<EmptyIndexListEntry> IndexListEntryEmptyKey;
-  ManagedStatic<TombstoneIndexListEntry> IndexListEntryTombstoneKey;
-}
-
 char SlotIndexes::ID = 0;
 INITIALIZE_PASS(SlotIndexes, "slotindexes",
                 "Slot index numbering", false, false)
 
-IndexListEntry* IndexListEntry::getEmptyKeyEntry() {
-  return &*IndexListEntryEmptyKey;
-}
-
-IndexListEntry* IndexListEntry::getTombstoneKeyEntry() {
-  return &*IndexListEntryTombstoneKey;
-}
-
+STATISTIC(NumLocalRenum,  "Number of local renumberings");
+STATISTIC(NumGlobalRenum, "Number of global renumberings");
 
 void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
   au.setPreservesAll();
@@ -59,7 +32,7 @@ void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
 
 void SlotIndexes::releaseMemory() {
   mi2iMap.clear();
-  mbb2IdxMap.clear();
+  MBBRanges.clear();
   idx2MBBMap.clear();
   clearList();
 }
@@ -85,13 +58,15 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
          "Index list non-empty at initial numbering?");
   assert(idx2MBBMap.empty() &&
          "Index -> MBB mapping non-empty at initial numbering?");
-  assert(mbb2IdxMap.empty() &&
+  assert(MBBRanges.empty() &&
          "MBB -> Index mapping non-empty at initial numbering?");
   assert(mi2iMap.empty() &&
          "MachineInstr -> Index mapping non-empty at initial numbering?");
 
   functionSize = 0;
   unsigned index = 0;
+  MBBRanges.resize(mf->getNumBlockIDs());
+  idx2MBBMap.reserve(mf->size());
 
   push_back(createEntry(0, index));
 
@@ -103,8 +78,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
     // Insert an index for the MBB start.
     SlotIndex blockStartIndex(back(), SlotIndex::LOAD);
 
-    index += SlotIndex::NUM;
-
     for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
          miItr != miEnd; ++miItr) {
       MachineInstr *mi = miItr;
@@ -112,32 +85,19 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
         continue;
 
       // Insert a store index for the instr.
-      push_back(createEntry(mi, index));
+      push_back(createEntry(mi, index += SlotIndex::InstrDist));
 
       // Save this base index in the maps.
-      mi2iMap.insert(
-        std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD)));
+      mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD)));
  
       ++functionSize;
-
-      unsigned Slots = mi->getDesc().getNumDefs();
-      if (Slots == 0)
-        Slots = 1;
-
-      index += (Slots + 1) * SlotIndex::NUM;
     }
 
-    // We insert two blank instructions between basic blocks.
-    // One to represent live-out registers and one to represent live-ins.
-    push_back(createEntry(0, index));
-    index += SlotIndex::NUM;
-
-    push_back(createEntry(0, index));
-
-    SlotIndex blockEndIndex(back(), SlotIndex::LOAD);
-    mbb2IdxMap.insert(
-      std::make_pair(mbb, std::make_pair(blockStartIndex, blockEndIndex)));
+    // We insert one blank instructions between basic blocks.
+    push_back(createEntry(0, index += SlotIndex::InstrDist));
 
+    MBBRanges[mbb->getNumber()].first = blockStartIndex;
+    MBBRanges[mbb->getNumber()].second = SlotIndex(back(), SlotIndex::LOAD);
     idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb));
   }
 
@@ -151,38 +111,41 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
 }
 
 void SlotIndexes::renumberIndexes() {
-
   // Renumber updates the index of every element of the index list.
-  // If all instrs in the function have been allocated an index (which has been
-  // placed in the index list in the order of instruction iteration) then the
-  // resulting numbering will match what would have been generated by the
-  // pass during the initial numbering of the function if the new instructions
-  // had been present.
   DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
+  ++NumGlobalRenum;
 
-  functionSize = 0;
   unsigned index = 0;
 
   for (IndexListEntry *curEntry = front(); curEntry != getTail();
        curEntry = curEntry->getNext()) {
-
     curEntry->setIndex(index);
-
-    if (curEntry->getInstr() == 0) {
-      // MBB start entry. Just step index by 1.
-      index += SlotIndex::NUM;
-    }
-    else {
-      ++functionSize;
-      unsigned Slots = curEntry->getInstr()->getDesc().getNumDefs();
-      if (Slots == 0)
-        Slots = 1;
-
-      index += (Slots + 1) * SlotIndex::NUM;
-    }
+    index += SlotIndex::InstrDist;
   }
 }
 
+// Renumber indexes locally after curEntry was inserted, but failed to get a new
+// index.
+void SlotIndexes::renumberIndexes(IndexListEntry *curEntry) {
+  // Number indexes with half the default spacing so we can catch up quickly.
+  const unsigned Space = SlotIndex::InstrDist/2;
+  assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM");
+
+  IndexListEntry *start = curEntry->getPrev();
+  unsigned index = start->getIndex();
+  IndexListEntry *tail = getTail();
+  do {
+    curEntry->setIndex(index += Space);
+    curEntry = curEntry->getNext();
+    // If the next index is bigger, we have caught up.
+  } while (curEntry != tail && curEntry->getIndex() <= index);
+
+  DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << start->getIndex() << '-'
+               << index << " ***\n");
+  ++NumLocalRenum;
+}
+
+
 void SlotIndexes::dump() const {
   for (const IndexListEntry *itr = front(); itr != getTail();
        itr = itr->getNext()) {
@@ -195,11 +158,9 @@ void SlotIndexes::dump() const {
     }
   }
 
-  for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin();
-       itr != mbb2IdxMap.end(); ++itr) {
-    dbgs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - ["
-           << itr->second.first << ", " << itr->second.second << "]\n";
-  }
+  for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i)
+    dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';'
+           << MBBRanges[i].second << ")\n";
 }
 
 // Print a SlotIndex to a raw_ostream.
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 9c0bf16..6949618 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -67,11 +67,11 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
 /// because all weights are positive.
 ///
 struct SpillPlacement::Node {
-  /// Frequency - Total block frequency feeding into[0] or out of[1] the bundle.
+  /// Scale - Inverse block frequency feeding into[0] or out of[1] the bundle.
   /// Ideally, these two numbers should be identical, but inaccuracies in the
   /// block frequency estimates means that we need to normalize ingoing and
   /// outgoing frequencies separately so they are commensurate.
-  float Frequency[2];
+  float Scale[2];
 
   /// Bias - Normalized contributions from non-transparent blocks.
   /// A bundle connected to a MustSpill block has a huge negative bias,
@@ -107,7 +107,7 @@ struct SpillPlacement::Node {
 
   /// Node - Create a blank Node.
   Node() {
-    Frequency[0] = Frequency[1] = 0;
+    Scale[0] = Scale[1] = 0;
   }
 
   /// clear - Reset per-query data, but preserve frequencies that only depend on
@@ -121,7 +121,7 @@ struct SpillPlacement::Node {
   /// out=0 for an ingoing link, and 1 for an outgoing link.
   void addLink(unsigned b, float w, bool out) {
     // Normalize w relative to all connected blocks from that direction.
-    w /= Frequency[out];
+    w *= Scale[out];
 
     // There can be multiple links to the same bundle, add them up.
     for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
@@ -134,9 +134,10 @@ struct SpillPlacement::Node {
   }
 
   /// addBias - Bias this node from an ingoing[0] or outgoing[1] link.
+  /// Return the change to the total number of positive biases.
   void addBias(float w, bool out) {
     // Normalize w relative to all connected blocks from that direction.
-    w /= Frequency[out];
+    w *= Scale[out];
     Bias += w;
   }
 
@@ -175,13 +176,22 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
   nodes = new Node[bundles->getNumBundles()];
 
   // Compute total ingoing and outgoing block frequencies for all bundles.
+  BlockFrequency.resize(mf.getNumBlockIDs());
   for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
-    float Freq = getBlockFrequency(I);
+    float Freq = LiveIntervals::getSpillWeight(true, false,
+                                               loops->getLoopDepth(I));
     unsigned Num = I->getNumber();
-    nodes[bundles->getBundle(Num, 1)].Frequency[0] += Freq;
-    nodes[bundles->getBundle(Num, 0)].Frequency[1] += Freq;
+    BlockFrequency[Num] = Freq;
+    nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq;
+    nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq;
   }
 
+  // Scales are reciprocal frequencies.
+  for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i)
+    for (unsigned d = 0; d != 2; ++d)
+      if (nodes[i].Scale[d] > 0)
+        nodes[i].Scale[d] = 1 / nodes[i].Scale[d];
+
   // We never change the function.
   return false;
 }
@@ -200,31 +210,12 @@ void SpillPlacement::activate(unsigned n) {
 }
 
 
-/// prepareNodes - Compute node biases and weights from a set of constraints.
+/// addConstraints - Compute node biases and weights from a set of constraints.
 /// Set a bit in NodeMask for each active node.
-void SpillPlacement::
-prepareNodes(const SmallVectorImpl<BlockConstraint> &LiveBlocks) {
-  for (SmallVectorImpl<BlockConstraint>::const_iterator I = LiveBlocks.begin(),
+void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
+  for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(),
        E = LiveBlocks.end(); I != E; ++I) {
-    MachineBasicBlock *MBB = MF->getBlockNumbered(I->Number);
-    float Freq = getBlockFrequency(MBB);
-
-    // Is this a transparent block? Link ingoing and outgoing bundles.
-    if (I->Entry == DontCare && I->Exit == DontCare) {
-      unsigned ib = bundles->getBundle(I->Number, 0);
-      unsigned ob = bundles->getBundle(I->Number, 1);
-
-      // Ignore self-loops.
-      if (ib == ob)
-        continue;
-      activate(ib);
-      activate(ob);
-      nodes[ib].addLink(ob, Freq, 1);
-      nodes[ob].addLink(ib, Freq, 0);
-      continue;
-    }
-
-    // This block is not transparent, but it can still add bias.
+    float Freq = getBlockFrequency(I->Number);
     const float Bias[] = {
       0,           // DontCare,
       1,           // PrefReg,
@@ -248,10 +239,54 @@ prepareNodes(const SmallVectorImpl<BlockConstraint> &LiveBlocks) {
   }
 }
 
+void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
+  for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E;
+       ++I) {
+    unsigned Number = *I;
+    unsigned ib = bundles->getBundle(Number, 0);
+    unsigned ob = bundles->getBundle(Number, 1);
+
+    // Ignore self-loops.
+    if (ib == ob)
+      continue;
+    activate(ib);
+    activate(ob);
+    if (nodes[ib].Links.empty() && !nodes[ib].mustSpill())
+      Linked.push_back(ib);
+    if (nodes[ob].Links.empty() && !nodes[ob].mustSpill())
+      Linked.push_back(ob);
+    float Freq = getBlockFrequency(Number);
+    nodes[ib].addLink(ob, Freq, 1);
+    nodes[ob].addLink(ib, Freq, 0);
+  }
+}
+
+bool SpillPlacement::scanActiveBundles() {
+  Linked.clear();
+  RecentPositive.clear();
+  for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
+    nodes[n].update(nodes);
+    // A node that must spill, or a node without any links is not going to
+    // change its value ever again, so exclude it from iterations.
+    if (nodes[n].mustSpill())
+      continue;
+    if (!nodes[n].Links.empty())
+      Linked.push_back(n);
+    if (nodes[n].preferReg())
+      RecentPositive.push_back(n);
+  }
+  return !RecentPositive.empty();
+}
+
 /// iterate - Repeatedly update the Hopfield nodes until stability or the
 /// maximum number of iterations is reached.
 /// @param Linked - Numbers of linked nodes that need updating.
-void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) {
+void SpillPlacement::iterate() {
+  // First update the recently positive nodes. They have likely received new
+  // negative bias that will turn them off.
+  while (!RecentPositive.empty())
+    nodes[RecentPositive.pop_back_val()].update(nodes);
+
   if (Linked.empty())
     return;
 
@@ -267,10 +302,13 @@ void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) {
     for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
            llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) {
       unsigned n = *I;
-      bool C = nodes[n].update(nodes);
-      Changed |= C;
+      if (nodes[n].update(nodes)) {
+        Changed = true;
+        if (nodes[n].preferReg())
+          RecentPositive.push_back(n);
+      }
     }
-    if (!Changed)
+    if (!Changed || !RecentPositive.empty())
       return;
 
     // Scan forwards, skipping the first node which was just updated.
@@ -278,53 +316,37 @@ void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) {
     for (SmallVectorImpl<unsigned>::const_iterator I =
            llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
       unsigned n = *I;
-      bool C = nodes[n].update(nodes);
-      Changed |= C;
+      if (nodes[n].update(nodes)) {
+        Changed = true;
+        if (nodes[n].preferReg())
+          RecentPositive.push_back(n);
+      }
     }
-    if (!Changed)
+    if (!Changed || !RecentPositive.empty())
       return;
   }
 }
 
-bool
-SpillPlacement::placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks,
-                            BitVector &RegBundles) {
+void SpillPlacement::prepare(BitVector &RegBundles) {
+  Linked.clear();
+  RecentPositive.clear();
   // Reuse RegBundles as our ActiveNodes vector.
   ActiveNodes = &RegBundles;
   ActiveNodes->clear();
   ActiveNodes->resize(bundles->getNumBundles());
+}
 
-  // Compute active nodes, links and biases.
-  prepareNodes(LiveBlocks);
-
-  // Update all active nodes, and find the ones that are actually linked to
-  // something so their value may change when iterating.
-  SmallVector<unsigned, 8> Linked;
-  for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n)) {
-    nodes[n].update(nodes);
-    // A node that must spill, or a node without any links is not going to
-    // change its value ever again, so exclude it from iterations.
-    if (!nodes[n].Links.empty() && !nodes[n].mustSpill())
-      Linked.push_back(n);
-  }
-
-  // Iterate the network to convergence.
-  iterate(Linked);
+bool
+SpillPlacement::finish() {
+  assert(ActiveNodes && "Call prepare() first");
 
-  // Write preferences back to RegBundles.
+  // Write preferences back to ActiveNodes.
   bool Perfect = true;
-  for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n))
+  for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n))
     if (!nodes[n].preferReg()) {
-      RegBundles.reset(n);
+      ActiveNodes->reset(n);
       Perfect = false;
     }
+  ActiveNodes = 0;
   return Perfect;
 }
-
-/// getBlockFrequency - Return our best estimate of the block frequency which is
-/// the expected number of block executions per function invocation.
-float SpillPlacement::getBlockFrequency(const MachineBasicBlock *MBB) {
-  // Use the unnormalized spill weight for real block frequencies.
-  return LiveIntervals::getSpillWeight(true, false, loops->getLoopDepth(MBB));
-}
-
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
index ef2d516..6952ad8 100644
--- a/lib/CodeGen/SpillPlacement.h
+++ b/lib/CodeGen/SpillPlacement.h
@@ -10,8 +10,8 @@
 // This analysis computes the optimal spill code placement between basic blocks.
 //
 // The runOnMachineFunction() method only precomputes some profiling information
-// about the CFG. The real work is done by placeSpills() which is called by the
-// register allocator.
+// about the CFG. The real work is done by prepare(), addConstraints(), and
+// finish() which are called by the register allocator.
 //
 // Given a variable that is live across multiple basic blocks, and given
 // constraints on the basic blocks where the variable is live, determine which
@@ -27,6 +27,8 @@
 #ifndef LLVM_CODEGEN_SPILLPLACEMENT_H
 #define LLVM_CODEGEN_SPILLPLACEMENT_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 
 namespace llvm {
@@ -35,7 +37,6 @@ class BitVector;
 class EdgeBundles;
 class MachineBasicBlock;
 class MachineLoopInfo;
-template <typename> class SmallVectorImpl;
 
 class SpillPlacement  : public MachineFunctionPass {
   struct Node;
@@ -44,10 +45,20 @@ class SpillPlacement  : public MachineFunctionPass {
   const MachineLoopInfo *loops;
   Node *nodes;
 
-  // Nodes that are active in the current computation. Owned by the placeSpills
+  // Nodes that are active in the current computation. Owned by the prepare()
   // caller.
   BitVector *ActiveNodes;
 
+  // Nodes with active links. Populated by scanActiveBundles.
+  SmallVector<unsigned, 8> Linked;
+
+  // Nodes that went positive during the last call to scanActiveBundles or
+  // iterate.
+  SmallVector<unsigned, 8> RecentPositive;
+
+  // Block frequencies are computed once. Indexed by block number.
+  SmallVector<float, 4> BlockFrequency;
+
 public:
   static char ID; // Pass identification, replacement for typeid.
 
@@ -70,28 +81,53 @@ public:
     BorderConstraint Exit : 8;  ///< Constraint on block exit.
   };
 
-  /// placeSpills - Compute the optimal spill code placement given the
-  /// constraints. No MustSpill constraints will be violated, and the smallest
-  /// possible number of PrefX constraints will be violated, weighted by
-  /// expected execution frequencies.
-  /// @param LiveBlocks Constraints for blocks that have the variable live in or
-  ///                   live out. DontCare/DontCare means the variable is live
-  ///                   through the block. DontCare/X means the variable is live
-  ///                   out, but not live in.
+  /// prepare - Reset state and prepare for a new spill placement computation.
   /// @param RegBundles Bit vector to receive the edge bundles where the
   ///                   variable should be kept in a register. Each bit
   ///                   corresponds to an edge bundle, a set bit means the
   ///                   variable should be kept in a register through the
   ///                   bundle. A clear bit means the variable should be
-  ///                   spilled.
+  ///                   spilled. This vector is retained.
+  void prepare(BitVector &RegBundles);
+
+  /// addConstraints - Add constraints and biases. This method may be called
+  /// more than once to accumulate constraints.
+  /// @param LiveBlocks Constraints for blocks that have the variable live in or
+  ///                   live out.
+  void addConstraints(ArrayRef<BlockConstraint> LiveBlocks);
+
+  /// addLinks - Add transparent blocks with the given numbers.
+  void addLinks(ArrayRef<unsigned> Links);
+
+  /// scanActiveBundles - Perform an initial scan of all bundles activated by
+  /// addConstraints and addLinks, updating their state. Add all the bundles
+  /// that now prefer a register to RecentPositive.
+  /// Prepare internal data structures for iterate.
+  /// Return true is there are any positive nodes.
+  bool scanActiveBundles();
+
+  /// iterate - Update the network iteratively until convergence, or new bundles
+  /// are found.
+  void iterate();
+
+  /// getRecentPositive - Return an array of bundles that became positive during
+  /// the previous call to scanActiveBundles or iterate.
+  ArrayRef<unsigned> getRecentPositive() { return RecentPositive; }
+
+  /// finish - Compute the optimal spill code placement given the
+  /// constraints. No MustSpill constraints will be violated, and the smallest
+  /// possible number of PrefX constraints will be violated, weighted by
+  /// expected execution frequencies.
+  /// The selected bundles are returned in the bitvector passed to prepare().
   /// @return True if a perfect solution was found, allowing the variable to be
   ///         in a register through all relevant bundles.
-  bool placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks,
-                   BitVector &RegBundles);
+  bool finish();
 
   /// getBlockFrequency - Return the estimated block execution frequency per
   /// function invocation.
-  float getBlockFrequency(const MachineBasicBlock*);
+  float getBlockFrequency(unsigned Number) const {
+    return BlockFrequency[Number];
+  }
 
 private:
   virtual bool runOnMachineFunction(MachineFunction&);
@@ -99,8 +135,6 @@ private:
   virtual void releaseMemory();
 
   void activate(unsigned);
-  void prepareNodes(const SmallVectorImpl<BlockConstraint>&);
-  void iterate(const SmallVectorImpl<unsigned>&);
 };
 
 } // end namespace llvm
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index fd38582..b6bbcd7 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -11,6 +11,7 @@
 
 #include "Spiller.h"
 #include "VirtRegMap.h"
+#include "LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -24,7 +25,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include <set>
 
 using namespace llvm;
 
@@ -180,11 +180,9 @@ public:
                  VirtRegMap &vrm)
     : SpillerBase(pass, mf, vrm) {}
 
-  void spill(LiveInterval *li,
-             SmallVectorImpl<LiveInterval*> &newIntervals,
-             const SmallVectorImpl<LiveInterval*> &) {
+  void spill(LiveRangeEdit &LRE) {
     // Ignore spillIs - we don't use it.
-    trivialSpillEverywhere(li, newIntervals);
+    trivialSpillEverywhere(&LRE.getParent(), *LRE.getNewVRegs());
   }
 };
 
@@ -210,22 +208,22 @@ public:
       vrm(&vrm) {}
 
   /// Falls back on LiveIntervals::addIntervalsForSpills.
-  void spill(LiveInterval *li,
-             SmallVectorImpl<LiveInterval*> &newIntervals,
-             const SmallVectorImpl<LiveInterval*> &spillIs) {
+  void spill(LiveRangeEdit &LRE) {
     std::vector<LiveInterval*> added =
-      lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
-    newIntervals.insert(newIntervals.end(), added.begin(), added.end());
+      lis->addIntervalsForSpills(LRE.getParent(), LRE.getUselessVRegs(),
+                                 loopInfo, *vrm);
+    LRE.getNewVRegs()->insert(LRE.getNewVRegs()->end(),
+                              added.begin(), added.end());
 
     // Update LiveStacks.
-    int SS = vrm->getStackSlot(li->reg);
+    int SS = vrm->getStackSlot(LRE.getReg());
     if (SS == VirtRegMap::NO_STACK_SLOT)
       return;
-    const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(li->reg);
+    const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(LRE.getReg());
     LiveInterval &SI = lss->getOrCreateInterval(SS, RC);
     if (!SI.hasAtLeastOneValue())
       SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator());
-    SI.MergeRangesInAsValue(*li, SI.getValNumInfo(0));
+    SI.MergeRangesInAsValue(LRE.getParent(), SI.getValNumInfo(0));
   }
 };
 
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index f017583..41f1727 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -12,11 +12,9 @@
 
 namespace llvm {
 
-  class LiveInterval;
+  class LiveRangeEdit;
   class MachineFunction;
   class MachineFunctionPass;
-  class SlotIndex;
-  template <typename T> class SmallVectorImpl;
   class VirtRegMap;
 
   /// Spiller interface.
@@ -27,16 +25,8 @@ namespace llvm {
   public:
     virtual ~Spiller() = 0;
 
-    /// spill - Spill the given live interval. The method used will depend on
-    /// the Spiller implementation selected.
-    ///
-    /// @param li            The live interval to be spilled.
-    /// @param spillIs       A list of intervals that are about to be spilled,
-    ///                      and so cannot be used for remat etc.
-    /// @param newIntervals  The newly created intervals will be appended here.
-    virtual void spill(LiveInterval *li,
-                       SmallVectorImpl<LiveInterval*> &newIntervals,
-                       const SmallVectorImpl<LiveInterval*> &spillIs) = 0;
+    /// spill - Spill the LRE.getParent() live interval.
+    virtual void spill(LiveRangeEdit &LRE) = 0;
 
   };
 
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index fd5d50b..ac9d72b 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -16,12 +16,11 @@
 #include "SplitKit.h"
 #include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -29,9 +28,8 @@
 
 using namespace llvm;
 
-static cl::opt<bool>
-AllowSplit("spiller-splits-edges",
-           cl::desc("Allow critical edge splitting during spilling"));
+STATISTIC(NumFinished, "Number of splits finished");
+STATISTIC(NumSimple,   "Number of splits that were simple");
 
 //===----------------------------------------------------------------------===//
 //                                 Split Analysis
@@ -45,49 +43,105 @@ SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm,
     LIS(lis),
     Loops(mli),
     TII(*MF.getTarget().getInstrInfo()),
-    CurLI(0) {}
+    CurLI(0),
+    LastSplitPoint(MF.getNumBlockIDs()) {}
 
 void SplitAnalysis::clear() {
   UseSlots.clear();
-  UsingInstrs.clear();
-  UsingBlocks.clear();
-  LiveBlocks.clear();
+  UseBlocks.clear();
+  ThroughBlocks.clear();
   CurLI = 0;
 }
 
-bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) {
-  MachineBasicBlock *T, *F;
-  SmallVector<MachineOperand, 4> Cond;
-  return !TII.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond);
+SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
+  const MachineBasicBlock *MBB = MF.getBlockNumbered(Num);
+  const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor();
+  std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num];
+
+  // Compute split points on the first call. The pair is independent of the
+  // current live interval.
+  if (!LSP.first.isValid()) {
+    MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator();
+    if (FirstTerm == MBB->end())
+      LSP.first = LIS.getMBBEndIdx(MBB);
+    else
+      LSP.first = LIS.getInstructionIndex(FirstTerm);
+
+    // If there is a landing pad successor, also find the call instruction.
+    if (!LPad)
+      return LSP.first;
+    // There may not be a call instruction (?) in which case we ignore LPad.
+    LSP.second = LSP.first;
+    for (MachineBasicBlock::const_iterator I = FirstTerm, E = MBB->begin();
+         I != E; --I)
+      if (I->getDesc().isCall()) {
+        LSP.second = LIS.getInstructionIndex(I);
+        break;
+      }
+  }
+
+  // If CurLI is live into a landing pad successor, move the last split point
+  // back to the call that may throw.
+  if (LPad && LSP.second.isValid() && LIS.isLiveInToMBB(*CurLI, LPad))
+    return LSP.second;
+  else
+    return LSP.first;
 }
 
 /// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
 void SplitAnalysis::analyzeUses() {
+  assert(UseSlots.empty() && "Call clear first");
+
+  // First get all the defs from the interval values. This provides the correct
+  // slots for early clobbers.
+  for (LiveInterval::const_vni_iterator I = CurLI->vni_begin(),
+       E = CurLI->vni_end(); I != E; ++I)
+    if (!(*I)->isPHIDef() && !(*I)->isUnused())
+      UseSlots.push_back((*I)->def);
+
+  // Get use slots form the use-def chain.
   const MachineRegisterInfo &MRI = MF.getRegInfo();
-  for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(CurLI->reg),
-       E = MRI.reg_end(); I != E; ++I) {
-    MachineOperand &MO = I.getOperand();
-    if (MO.isUse() && MO.isUndef())
-      continue;
-    MachineInstr *MI = MO.getParent();
-    if (MI->isDebugValue() || !UsingInstrs.insert(MI))
-      continue;
-    UseSlots.push_back(LIS.getInstructionIndex(MI).getDefIndex());
-    MachineBasicBlock *MBB = MI->getParent();
-    UsingBlocks[MBB]++;
-  }
+  for (MachineRegisterInfo::use_nodbg_iterator
+       I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E;
+       ++I)
+    if (!I.getOperand().isUndef())
+      UseSlots.push_back(LIS.getInstructionIndex(&*I).getDefIndex());
+
   array_pod_sort(UseSlots.begin(), UseSlots.end());
-  calcLiveBlockInfo();
-  DEBUG(dbgs() << "  counted "
-               << UsingInstrs.size() << " instrs, "
-               << UsingBlocks.size() << " blocks.\n");
+
+  // Remove duplicates, keeping the smaller slot for each instruction.
+  // That is what we want for early clobbers.
+  UseSlots.erase(std::unique(UseSlots.begin(), UseSlots.end(),
+                             SlotIndex::isSameInstr),
+                 UseSlots.end());
+
+  // Compute per-live block info.
+  if (!calcLiveBlockInfo()) {
+    // FIXME: calcLiveBlockInfo found inconsistencies in the live range.
+    // I am looking at you, SimpleRegisterCoalescing!
+    DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
+    const_cast<LiveIntervals&>(LIS)
+      .shrinkToUses(const_cast<LiveInterval*>(CurLI));
+    UseBlocks.clear();
+    ThroughBlocks.clear();
+    bool fixed = calcLiveBlockInfo();
+    (void)fixed;
+    assert(fixed && "Couldn't fix broken live interval");
+  }
+
+  DEBUG(dbgs() << "Analyze counted "
+               << UseSlots.size() << " instrs in "
+               << UseBlocks.size() << " blocks, through "
+               << NumThroughBlocks << " blocks.\n");
 }
 
 /// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
 /// where CurLI is live.
-void SplitAnalysis::calcLiveBlockInfo() {
+bool SplitAnalysis::calcLiveBlockInfo() {
+  ThroughBlocks.resize(MF.getNumBlockIDs());
+  NumThroughBlocks = 0;
   if (CurLI->empty())
-    return;
+    return true;
 
   LiveInterval::const_iterator LVI = CurLI->begin();
   LiveInterval::const_iterator LVE = CurLI->end();
@@ -104,24 +158,14 @@ void SplitAnalysis::calcLiveBlockInfo() {
     SlotIndex Start, Stop;
     tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
 
-    // The last split point is the latest possible insertion point that dominates
-    // all successor blocks. If interference reaches LastSplitPoint, it is not
-    // possible to insert a split or reload that makes CurLI live in the
-    // outgoing bundle.
-    MachineBasicBlock::iterator LSP = LIS.getLastSplitPoint(*CurLI, BI.MBB);
-    if (LSP == BI.MBB->end())
-      BI.LastSplitPoint = Stop;
-    else
-      BI.LastSplitPoint = LIS.getInstructionIndex(LSP);
-
     // LVI is the first live segment overlapping MBB.
     BI.LiveIn = LVI->start <= Start;
     if (!BI.LiveIn)
       BI.Def = LVI->start;
 
     // Find the first and last uses in the block.
-    BI.Uses = hasUses(MFI);
-    if (BI.Uses && UseI != UseE) {
+    bool Uses = UseI != UseE && *UseI < Stop;
+    if (Uses) {
       BI.FirstUse = *UseI;
       assert(BI.FirstUse >= Start);
       do ++UseI;
@@ -149,7 +193,16 @@ void SplitAnalysis::calcLiveBlockInfo() {
 
     // Don't set LiveThrough when the block has a gap.
     BI.LiveThrough = !hasGap && BI.LiveIn && BI.LiveOut;
-    LiveBlocks.push_back(BI);
+    if (Uses)
+      UseBlocks.push_back(BI);
+    else {
+      ++NumThroughBlocks;
+      ThroughBlocks.set(BI.MBB->getNumber());
+    }
+    // FIXME: This should never happen. The live range stops or starts without a
+    // corresponding use. An earlier pass did something wrong.
+    if (!BI.LiveThrough && !Uses)
+      return false;
 
     // LVI is now at LVE or LVI->end >= Stop.
     if (LVI == LVE)
@@ -165,6 +218,30 @@ void SplitAnalysis::calcLiveBlockInfo() {
     else
       MFI = LIS.getMBBFromIndex(LVI->start);
   }
+  return true;
+}
+
+unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
+  if (cli->empty())
+    return 0;
+  LiveInterval *li = const_cast<LiveInterval*>(cli);
+  LiveInterval::iterator LVI = li->begin();
+  LiveInterval::iterator LVE = li->end();
+  unsigned Count = 0;
+
+  // Loop over basic blocks where li is live.
+  MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start);
+  SlotIndex Stop = LIS.getMBBEndIdx(MFI);
+  for (;;) {
+    ++Count;
+    LVI = li->advanceTo(LVI, Stop);
+    if (LVI == LVE)
+      return Count;
+    do {
+      ++MFI;
+      Stop = LIS.getMBBEndIdx(MFI);
+    } while (Stop <= LVI->start);
+  }
 }
 
 bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const {
@@ -181,15 +258,6 @@ bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const {
   return I != Orig.begin() && (--I)->end == Idx;
 }
 
-void SplitAnalysis::print(const BlockPtrSet &B, raw_ostream &OS) const {
-  for (BlockPtrSet::const_iterator I = B.begin(), E = B.end(); I != E; ++I) {
-    unsigned count = UsingBlocks.lookup(*I);
-    OS << " BB#" << (*I)->getNumber();
-    if (count)
-      OS << '(' << count << ')';
-  }
-}
-
 void SplitAnalysis::analyze(const LiveInterval *li) {
   clear();
   CurLI = li;
@@ -198,171 +266,242 @@ void SplitAnalysis::analyze(const LiveInterval *li) {
 
 
 //===----------------------------------------------------------------------===//
-//                               LiveIntervalMap
+//                               Split Editor
 //===----------------------------------------------------------------------===//
 
-// Work around the fact that the std::pair constructors are broken for pointer
-// pairs in some implementations. makeVV(x, 0) works.
-static inline std::pair<const VNInfo*, VNInfo*>
-makeVV(const VNInfo *a, VNInfo *b) {
-  return std::make_pair(a, b);
-}
+/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+SplitEditor::SplitEditor(SplitAnalysis &sa,
+                         LiveIntervals &lis,
+                         VirtRegMap &vrm,
+                         MachineDominatorTree &mdt)
+  : SA(sa), LIS(lis), VRM(vrm),
+    MRI(vrm.getMachineFunction().getRegInfo()),
+    MDT(mdt),
+    TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
+    TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
+    Edit(0),
+    OpenIdx(0),
+    RegAssign(Allocator)
+{}
 
-void LiveIntervalMap::reset(LiveInterval *li) {
-  LI = li;
+void SplitEditor::reset(LiveRangeEdit &lre) {
+  Edit = &lre;
+  OpenIdx = 0;
+  RegAssign.clear();
   Values.clear();
-  LiveOutCache.clear();
+
+  // We don't need to clear LiveOutCache, only LiveOutSeen entries are read.
+  LiveOutSeen.clear();
+
+  // We don't need an AliasAnalysis since we will only be performing
+  // cheap-as-a-copy remats anyway.
+  Edit->anyRematerializable(LIS, TII, 0);
 }
 
-bool LiveIntervalMap::isComplexMapped(const VNInfo *ParentVNI) const {
-  ValueMap::const_iterator i = Values.find(ParentVNI);
-  return i != Values.end() && i->second == 0;
+void SplitEditor::dump() const {
+  if (RegAssign.empty()) {
+    dbgs() << " empty\n";
+    return;
+  }
+
+  for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I)
+    dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
+  dbgs() << '\n';
 }
 
-// defValue - Introduce a LI def for ParentVNI that could be later than
-// ParentVNI->def.
-VNInfo *LiveIntervalMap::defValue(const VNInfo *ParentVNI, SlotIndex Idx) {
-  assert(LI && "call reset first");
+VNInfo *SplitEditor::defValue(unsigned RegIdx,
+                              const VNInfo *ParentVNI,
+                              SlotIndex Idx) {
   assert(ParentVNI && "Mapping  NULL value");
   assert(Idx.isValid() && "Invalid SlotIndex");
-  assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
+  assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI");
+  LiveInterval *LI = Edit->get(RegIdx);
 
   // Create a new value.
   VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator());
 
-  // Preserve the PHIDef bit.
-  if (ParentVNI->isPHIDef() && Idx == ParentVNI->def)
-    VNI->setIsPHIDef(true);
-
   // Use insert for lookup, so we can add missing values with a second lookup.
-  std::pair<ValueMap::iterator,bool> InsP =
-    Values.insert(makeVV(ParentVNI, Idx == ParentVNI->def ? VNI : 0));
+  std::pair<ValueMap::iterator, bool> InsP =
+    Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), VNI));
+
+  // This was the first time (RegIdx, ParentVNI) was mapped.
+  // Keep it as a simple def without any liveness.
+  if (InsP.second)
+    return VNI;
 
-  // This is now a complex def. Mark with a NULL in valueMap.
-  if (!InsP.second)
+  // If the previous value was a simple mapping, add liveness for it now.
+  if (VNInfo *OldVNI = InsP.first->second) {
+    SlotIndex Def = OldVNI->def;
+    LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI));
+    // No longer a simple mapping.
     InsP.first->second = 0;
+  }
+
+  // This is a complex mapping, add liveness for VNI
+  SlotIndex Def = VNI->def;
+  LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
 
   return VNI;
 }
 
-
-// mapValue - Find the mapped value for ParentVNI at Idx.
-// Potentially create phi-def values.
-VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx,
-                                  bool *simple) {
-  assert(LI && "call reset first");
+void SplitEditor::markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI) {
   assert(ParentVNI && "Mapping  NULL value");
-  assert(Idx.isValid() && "Invalid SlotIndex");
-  assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
+  VNInfo *&VNI = Values[std::make_pair(RegIdx, ParentVNI->id)];
 
-  // Use insert for lookup, so we can add missing values with a second lookup.
-  std::pair<ValueMap::iterator,bool> InsP =
-    Values.insert(makeVV(ParentVNI, 0));
-
-  // This was an unknown value. Create a simple mapping.
-  if (InsP.second) {
-    if (simple) *simple = true;
-    return InsP.first->second = LI->createValueCopy(ParentVNI,
-                                                     LIS.getVNInfoAllocator());
-  }
+  // ParentVNI was either unmapped or already complex mapped. Either way.
+  if (!VNI)
+    return;
 
-  // This was a simple mapped value.
-  if (InsP.first->second) {
-    if (simple) *simple = true;
-    return InsP.first->second;
-  }
+  // This was previously a single mapping. Make sure the old def is represented
+  // by a trivial live range.
+  SlotIndex Def = VNI->def;
+  Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+  VNI = 0;
+}
 
-  // This is a complex mapped value. There may be multiple defs, and we may need
-  // to create phi-defs.
-  if (simple) *simple = false;
+// extendRange - Extend the live range to reach Idx.
+// Potentially create phi-def values.
+void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) {
+  assert(Idx.isValid() && "Invalid SlotIndex");
   MachineBasicBlock *IdxMBB = LIS.getMBBFromIndex(Idx);
   assert(IdxMBB && "No MBB at Idx");
+  LiveInterval *LI = Edit->get(RegIdx);
 
   // Is there a def in the same MBB we can extend?
-  if (VNInfo *VNI = extendTo(IdxMBB, Idx))
-    return VNI;
+  if (LI->extendInBlock(LIS.getMBBStartIdx(IdxMBB), Idx))
+    return;
 
   // Now for the fun part. We know that ParentVNI potentially has multiple defs,
   // and we may need to create even more phi-defs to preserve VNInfo SSA form.
   // Perform a search for all predecessor blocks where we know the dominating
-  // VNInfo. Insert phi-def VNInfos along the path back to IdxMBB.
-  DEBUG(dbgs() << "\n  Reaching defs for BB#" << IdxMBB->getNumber()
-               << " at " << Idx << " in " << *LI << '\n');
+  // VNInfo.
+  VNInfo *VNI = findReachingDefs(LI, IdxMBB, Idx.getNextSlot());
+
+  // When there were multiple different values, we may need new PHIs.
+  if (!VNI)
+    return updateSSA();
+
+  // Poor man's SSA update for the single-value case.
+  LiveOutPair LOP(VNI, MDT[LIS.getMBBFromIndex(VNI->def)]);
+  for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(),
+         E = LiveInBlocks.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I->DomNode->getBlock();
+    SlotIndex Start = LIS.getMBBStartIdx(MBB);
+    if (I->Kill.isValid())
+      LI->addRange(LiveRange(Start, I->Kill, VNI));
+    else {
+      LiveOutCache[MBB] = LOP;
+      LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
+    }
+  }
+}
+
+/// findReachingDefs - Search the CFG for known live-out values.
+/// Add required live-in blocks to LiveInBlocks.
+VNInfo *SplitEditor::findReachingDefs(LiveInterval *LI,
+                                      MachineBasicBlock *KillMBB,
+                                      SlotIndex Kill) {
+  // Initialize the live-out cache the first time it is needed.
+  if (LiveOutSeen.empty()) {
+    unsigned N = VRM.getMachineFunction().getNumBlockIDs();
+    LiveOutSeen.resize(N);
+    LiveOutCache.resize(N);
+  }
 
   // Blocks where LI should be live-in.
-  SmallVector<MachineDomTreeNode*, 16> LiveIn;
-  LiveIn.push_back(MDT[IdxMBB]);
+  SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB);
+
+  // Remember if we have seen more than one value.
+  bool UniqueVNI = true;
+  VNInfo *TheVNI = 0;
 
   // Using LiveOutCache as a visited set, perform a BFS for all reaching defs.
-  for (unsigned i = 0; i != LiveIn.size(); ++i) {
-    MachineBasicBlock *MBB = LiveIn[i]->getBlock();
+  for (unsigned i = 0; i != WorkList.size(); ++i) {
+    MachineBasicBlock *MBB = WorkList[i];
+    assert(!MBB->pred_empty() && "Value live-in to entry block?");
     for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
            PE = MBB->pred_end(); PI != PE; ++PI) {
        MachineBasicBlock *Pred = *PI;
+       LiveOutPair &LOP = LiveOutCache[Pred];
+
        // Is this a known live-out block?
-       std::pair<LiveOutMap::iterator,bool> LOIP =
-         LiveOutCache.insert(std::make_pair(Pred, LiveOutPair()));
-       // Yes, we have been here before.
-       if (!LOIP.second) {
-         DEBUG(if (VNInfo *VNI = LOIP.first->second.first)
-                 dbgs() << "    known valno #" << VNI->id
-                        << " at BB#" << Pred->getNumber() << '\n');
+       if (LiveOutSeen.test(Pred->getNumber())) {
+         if (VNInfo *VNI = LOP.first) {
+           if (TheVNI && TheVNI != VNI)
+             UniqueVNI = false;
+           TheVNI = VNI;
+         }
          continue;
        }
 
+       // First time. LOP is garbage and must be cleared below.
+       LiveOutSeen.set(Pred->getNumber());
+
        // Does Pred provide a live-out value?
-       SlotIndex Last = LIS.getMBBEndIdx(Pred).getPrevSlot();
-       if (VNInfo *VNI = extendTo(Pred, Last)) {
-         MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(VNI->def);
-         DEBUG(dbgs() << "    found valno #" << VNI->id
-                      << " from BB#" << DefMBB->getNumber()
-                      << " at BB#" << Pred->getNumber() << '\n');
-         LiveOutPair &LOP = LOIP.first->second;
-         LOP.first = VNI;
-         LOP.second = MDT[DefMBB];
+       SlotIndex Start, Last;
+       tie(Start, Last) = LIS.getSlotIndexes()->getMBBRange(Pred);
+       Last = Last.getPrevSlot();
+       VNInfo *VNI = LI->extendInBlock(Start, Last);
+       LOP.first = VNI;
+       if (VNI) {
+         LOP.second = MDT[LIS.getMBBFromIndex(VNI->def)];
+         if (TheVNI && TheVNI != VNI)
+           UniqueVNI = false;
+         TheVNI = VNI;
          continue;
        }
+       LOP.second = 0;
+
        // No, we need a live-in value for Pred as well
-       if (Pred != IdxMBB)
-         LiveIn.push_back(MDT[Pred]);
+       if (Pred != KillMBB)
+          WorkList.push_back(Pred);
+       else
+          // Loopback to KillMBB, so value is really live through.
+         Kill = SlotIndex();
     }
   }
 
-  // We may need to add phi-def values to preserve the SSA form.
+  // Transfer WorkList to LiveInBlocks in reverse order.
+  // This ordering works best with updateSSA().
+  LiveInBlocks.clear();
+  LiveInBlocks.reserve(WorkList.size());
+  while(!WorkList.empty())
+    LiveInBlocks.push_back(MDT[WorkList.pop_back_val()]);
+
+  // The kill block may not be live-through.
+  assert(LiveInBlocks.back().DomNode->getBlock() == KillMBB);
+  LiveInBlocks.back().Kill = Kill;
+
+  return UniqueVNI ? TheVNI : 0;
+}
+
+void SplitEditor::updateSSA() {
   // This is essentially the same iterative algorithm that SSAUpdater uses,
   // except we already have a dominator tree, so we don't have to recompute it.
-  VNInfo *IdxVNI = 0;
   unsigned Changes;
   do {
     Changes = 0;
-    DEBUG(dbgs() << "  Iterating over " << LiveIn.size() << " blocks.\n");
-    // Propagate live-out values down the dominator tree, inserting phi-defs when
-    // necessary. Since LiveIn was created by a BFS, going backwards makes it more
-    // likely for us to visit immediate dominators before their children.
-    for (unsigned i = LiveIn.size(); i; --i) {
-      MachineDomTreeNode *Node = LiveIn[i-1];
+    // Propagate live-out values down the dominator tree, inserting phi-defs
+    // when necessary.
+    for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(),
+           E = LiveInBlocks.end(); I != E; ++I) {
+      MachineDomTreeNode *Node = I->DomNode;
+      // Skip block if the live-in value has already been determined.
+      if (!Node)
+        continue;
       MachineBasicBlock *MBB = Node->getBlock();
       MachineDomTreeNode *IDom = Node->getIDom();
       LiveOutPair IDomValue;
+
       // We need a live-in value to a block with no immediate dominator?
       // This is probably an unreachable block that has survived somehow.
-      bool needPHI = !IDom;
+      bool needPHI = !IDom || !LiveOutSeen.test(IDom->getBlock()->getNumber());
 
-      // Get the IDom live-out value.
-      if (!needPHI) {
-        LiveOutMap::iterator I = LiveOutCache.find(IDom->getBlock());
-        if (I != LiveOutCache.end())
-          IDomValue = I->second;
-        else
-          // If IDom is outside our set of live-out blocks, there must be new
-          // defs, and we need a phi-def here.
-          needPHI = true;
-      }
-
-      // IDom dominates all of our predecessors, but it may not be the immediate
-      // dominator. Check if any of them have live-out values that are properly
-      // dominated by IDom. If so, we need a phi-def here.
+      // IDom dominates all of our predecessors, but it may not be their
+      // immediate dominator. Check if any of them have live-out values that are
+      // properly dominated by IDom. If so, we need a phi-def here.
       if (!needPHI) {
+        IDomValue = LiveOutCache[IDom->getBlock()];
         for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
                PE = MBB->pred_end(); PI != PE; ++PI) {
           LiveOutPair Value = LiveOutCache[*PI];
@@ -378,215 +517,57 @@ VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx,
         }
       }
 
+      // The value may be live-through even if Kill is set, as can happen when
+      // we are called from extendRange. In that case LiveOutSeen is true, and
+      // LiveOutCache indicates a foreign or missing value.
+      LiveOutPair &LOP = LiveOutCache[MBB];
+
       // Create a phi-def if required.
       if (needPHI) {
         ++Changes;
         SlotIndex Start = LIS.getMBBStartIdx(MBB);
+        unsigned RegIdx = RegAssign.lookup(Start);
+        LiveInterval *LI = Edit->get(RegIdx);
         VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator());
         VNI->setIsPHIDef(true);
-        DEBUG(dbgs() << "    - BB#" << MBB->getNumber()
-                     << " phi-def #" << VNI->id << " at " << Start << '\n');
-        // We no longer need LI to be live-in.
-        LiveIn.erase(LiveIn.begin()+(i-1));
-        // Blocks in LiveIn are either IdxMBB, or have a value live-through.
-        if (MBB == IdxMBB)
-          IdxVNI = VNI;
-        // Check if we need to update live-out info.
-        LiveOutMap::iterator I = LiveOutCache.find(MBB);
-        if (I == LiveOutCache.end() || I->second.second == Node) {
-          // We already have a live-out defined in MBB, so this must be IdxMBB.
-          assert(MBB == IdxMBB && "Adding phi-def to known live-out");
-          LI->addRange(LiveRange(Start, Idx.getNextSlot(), VNI));
-        } else {
-          // This phi-def is also live-out, so color the whole block.
+        I->Value = VNI;
+        // This block is done, we know the final value.
+        I->DomNode = 0;
+        if (I->Kill.isValid())
+          LI->addRange(LiveRange(Start, I->Kill, VNI));
+        else {
           LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
-          I->second = LiveOutPair(VNI, Node);
+          LOP = LiveOutPair(VNI, Node);
         }
       } else if (IDomValue.first) {
-        // No phi-def here. Remember incoming value for IdxMBB.
-        if (MBB == IdxMBB)
-          IdxVNI = IDomValue.first;
+        // No phi-def here. Remember incoming value.
+        I->Value = IDomValue.first;
+        if (I->Kill.isValid())
+          continue;
         // Propagate IDomValue if needed:
         // MBB is live-out and doesn't define its own value.
-        LiveOutMap::iterator I = LiveOutCache.find(MBB);
-        if (I != LiveOutCache.end() && I->second.second != Node &&
-            I->second.first != IDomValue.first) {
+        if (LOP.second != Node && LOP.first != IDomValue.first) {
           ++Changes;
-          I->second = IDomValue;
-          DEBUG(dbgs() << "    - BB#" << MBB->getNumber()
-                       << " idom valno #" << IDomValue.first->id
-                       << " from BB#" << IDom->getBlock()->getNumber() << '\n');
+          LOP = IDomValue;
         }
       }
     }
-    DEBUG(dbgs() << "  - made " << Changes << " changes.\n");
   } while (Changes);
 
-  assert(IdxVNI && "Didn't find value for Idx");
-
-#ifndef NDEBUG
-  // Check the LiveOutCache invariants.
-  for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end();
-         I != E; ++I) {
-    assert(I->first && "Null MBB entry in cache");
-    assert(I->second.first && "Null VNInfo in cache");
-    assert(I->second.second && "Null DomTreeNode in cache");
-    if (I->second.second->getBlock() == I->first)
-      continue;
-    for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(),
-           PE = I->first->pred_end(); PI != PE; ++PI)
-      assert(LiveOutCache.lookup(*PI) == I->second && "Bad invariant");
-  }
-#endif
-
-  // Since we went through the trouble of a full BFS visiting all reaching defs,
-  // the values in LiveIn are now accurate. No more phi-defs are needed
+  // The values in LiveInBlocks are now accurate. No more phi-defs are needed
   // for these blocks, so we can color the live ranges.
-  // This makes the next mapValue call much faster.
-  for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) {
-    MachineBasicBlock *MBB = LiveIn[i]->getBlock();
+  for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(),
+         E = LiveInBlocks.end(); I != E; ++I) {
+    if (!I->DomNode)
+      continue;
+    assert(I->Value && "No live-in value found");
+    MachineBasicBlock *MBB = I->DomNode->getBlock();
     SlotIndex Start = LIS.getMBBStartIdx(MBB);
-    VNInfo *VNI = LiveOutCache.lookup(MBB).first;
-
-    // Anything in LiveIn other than IdxMBB is live-through.
-    // In IdxMBB, we should stop at Idx unless the same value is live-out.
-    if (MBB == IdxMBB && IdxVNI != VNI)
-      LI->addRange(LiveRange(Start, Idx.getNextSlot(), IdxVNI));
-    else
-      LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
+    unsigned RegIdx = RegAssign.lookup(Start);
+    LiveInterval *LI = Edit->get(RegIdx);
+    LI->addRange(LiveRange(Start, I->Kill.isValid() ?
+                                  I->Kill : LIS.getMBBEndIdx(MBB), I->Value));
   }
-
-  return IdxVNI;
-}
-
-#ifndef NDEBUG
-void LiveIntervalMap::dumpCache() {
-  for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end();
-         I != E; ++I) {
-    assert(I->first && "Null MBB entry in cache");
-    assert(I->second.first && "Null VNInfo in cache");
-    assert(I->second.second && "Null DomTreeNode in cache");
-    dbgs() << "    cache: BB#" << I->first->getNumber()
-           << " has valno #" << I->second.first->id << " from BB#"
-           << I->second.second->getBlock()->getNumber() << ", preds";
-    for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(),
-           PE = I->first->pred_end(); PI != PE; ++PI)
-      dbgs() << " BB#" << (*PI)->getNumber();
-    dbgs() << '\n';
-  }
-  dbgs() << "    cache: " << LiveOutCache.size() << " entries.\n";
-}
-#endif
-
-// extendTo - Find the last LI value defined in MBB at or before Idx. The
-// ParentLI is assumed to be live at Idx. Extend the live range to Idx.
-// Return the found VNInfo, or NULL.
-VNInfo *LiveIntervalMap::extendTo(const MachineBasicBlock *MBB, SlotIndex Idx) {
-  assert(LI && "call reset first");
-  LiveInterval::iterator I = std::upper_bound(LI->begin(), LI->end(), Idx);
-  if (I == LI->begin())
-    return 0;
-  --I;
-  if (I->end <= LIS.getMBBStartIdx(MBB))
-    return 0;
-  if (I->end <= Idx)
-    I->end = Idx.getNextSlot();
-  return I->valno;
-}
-
-// addSimpleRange - Add a simple range from ParentLI to LI.
-// ParentVNI must be live in the [Start;End) interval.
-void LiveIntervalMap::addSimpleRange(SlotIndex Start, SlotIndex End,
-                                     const VNInfo *ParentVNI) {
-  assert(LI && "call reset first");
-  bool simple;
-  VNInfo *VNI = mapValue(ParentVNI, Start, &simple);
-  // A simple mapping is easy.
-  if (simple) {
-    LI->addRange(LiveRange(Start, End, VNI));
-    return;
-  }
-
-  // ParentVNI is a complex value. We must map per MBB.
-  MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
-  MachineFunction::iterator MBBE = LIS.getMBBFromIndex(End.getPrevSlot());
-
-  if (MBB == MBBE) {
-    LI->addRange(LiveRange(Start, End, VNI));
-    return;
-  }
-
-  // First block.
-  LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
-
-  // Run sequence of full blocks.
-  for (++MBB; MBB != MBBE; ++MBB) {
-    Start = LIS.getMBBStartIdx(MBB);
-    LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB),
-                            mapValue(ParentVNI, Start)));
-  }
-
-  // Final block.
-  Start = LIS.getMBBStartIdx(MBB);
-  if (Start != End)
-    LI->addRange(LiveRange(Start, End, mapValue(ParentVNI, Start)));
-}
-
-/// addRange - Add live ranges to LI where [Start;End) intersects ParentLI.
-/// All needed values whose def is not inside [Start;End) must be defined
-/// beforehand so mapValue will work.
-void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) {
-  assert(LI && "call reset first");
-  LiveInterval::const_iterator B = ParentLI.begin(), E = ParentLI.end();
-  LiveInterval::const_iterator I = std::lower_bound(B, E, Start);
-
-  // Check if --I begins before Start and overlaps.
-  if (I != B) {
-    --I;
-    if (I->end > Start)
-      addSimpleRange(Start, std::min(End, I->end), I->valno);
-    ++I;
-  }
-
-  // The remaining ranges begin after Start.
-  for (;I != E && I->start < End; ++I)
-    addSimpleRange(I->start, std::min(End, I->end), I->valno);
-}
-
-
-//===----------------------------------------------------------------------===//
-//                               Split Editor
-//===----------------------------------------------------------------------===//
-
-/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
-SplitEditor::SplitEditor(SplitAnalysis &sa,
-                         LiveIntervals &lis,
-                         VirtRegMap &vrm,
-                         MachineDominatorTree &mdt,
-                         LiveRangeEdit &edit)
-  : SA(sa), LIS(lis), VRM(vrm),
-    MRI(vrm.getMachineFunction().getRegInfo()),
-    MDT(mdt),
-    TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
-    TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
-    Edit(edit),
-    OpenIdx(0),
-    RegAssign(Allocator)
-{
-  // We don't need an AliasAnalysis since we will only be performing
-  // cheap-as-a-copy remats anyway.
-  Edit.anyRematerializable(LIS, TII, 0);
-}
-
-void SplitEditor::dump() const {
-  if (RegAssign.empty()) {
-    dbgs() << " empty\n";
-    return;
-  }
-
-  for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I)
-    dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
-  dbgs() << '\n';
 }
 
 VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
@@ -596,51 +577,53 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
                                    MachineBasicBlock::iterator I) {
   MachineInstr *CopyMI = 0;
   SlotIndex Def;
-  LiveInterval *LI = Edit.get(RegIdx);
+  LiveInterval *LI = Edit->get(RegIdx);
+
+  // We may be trying to avoid interference that ends at a deleted instruction,
+  // so always begin RegIdx 0 early and all others late.
+  bool Late = RegIdx != 0;
 
   // Attempt cheap-as-a-copy rematerialization.
   LiveRangeEdit::Remat RM(ParentVNI);
-  if (Edit.canRematerializeAt(RM, UseIdx, true, LIS)) {
-    Def = Edit.rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI);
+  if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) {
+    Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late);
   } else {
     // Can't remat, just insert a copy from parent.
     CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
-               .addReg(Edit.getReg());
-    Def = LIS.InsertMachineInstrInMaps(CopyMI).getDefIndex();
+               .addReg(Edit->getReg());
+    Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late)
+            .getDefIndex();
   }
 
   // Define the value in Reg.
-  VNInfo *VNI = LIMappers[RegIdx].defValue(ParentVNI, Def);
+  VNInfo *VNI = defValue(RegIdx, ParentVNI, Def);
   VNI->setCopy(CopyMI);
-
-  // Add minimal liveness for the new value.
-  Edit.get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
   return VNI;
 }
 
 /// Create a new virtual register and live interval.
-void SplitEditor::openIntv() {
-  assert(!OpenIdx && "Previous LI not closed before openIntv");
-
+unsigned SplitEditor::openIntv() {
   // Create the complement as index 0.
-  if (Edit.empty()) {
-    Edit.create(MRI, LIS, VRM);
-    LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent()));
-    LIMappers.back().reset(Edit.get(0));
-  }
+  if (Edit->empty())
+    Edit->create(LIS, VRM);
 
   // Create the open interval.
-  OpenIdx = Edit.size();
-  Edit.create(MRI, LIS, VRM);
-  LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent()));
-  LIMappers[OpenIdx].reset(Edit.get(OpenIdx));
+  OpenIdx = Edit->size();
+  Edit->create(LIS, VRM);
+  return OpenIdx;
+}
+
+void SplitEditor::selectIntv(unsigned Idx) {
+  assert(Idx != 0 && "Cannot select the complement interval");
+  assert(Idx < Edit->size() && "Can only select previously opened interval");
+  OpenIdx = Idx;
 }
 
 SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
   assert(OpenIdx && "openIntv not called before enterIntvBefore");
   DEBUG(dbgs() << "    enterIntvBefore " << Idx);
   Idx = Idx.getBaseIndex();
-  VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
   if (!ParentVNI) {
     DEBUG(dbgs() << ": not live\n");
     return Idx;
@@ -658,14 +641,14 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
   SlotIndex End = LIS.getMBBEndIdx(&MBB);
   SlotIndex Last = End.getPrevSlot();
   DEBUG(dbgs() << "    enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last);
-  VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Last);
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last);
   if (!ParentVNI) {
     DEBUG(dbgs() << ": not live\n");
     return End;
   }
   DEBUG(dbgs() << ": valno " << ParentVNI->id);
   VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
-                              LIS.getLastSplitPoint(Edit.getParent(), &MBB));
+                              LIS.getLastSplitPoint(Edit->getParent(), &MBB));
   RegAssign.insert(VNI->def, End, OpenIdx);
   DEBUG(dump());
   return VNI->def;
@@ -689,7 +672,7 @@ SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
 
   // The interval must be live beyond the instruction at Idx.
   Idx = Idx.getBoundaryIndex();
-  VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
   if (!ParentVNI) {
     DEBUG(dbgs() << ": not live\n");
     return Idx.getNextSlot();
@@ -709,7 +692,7 @@ SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
 
   // The interval must be live into the instruction at Idx.
   Idx = Idx.getBoundaryIndex();
-  VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
   if (!ParentVNI) {
     DEBUG(dbgs() << ": not live\n");
     return Idx.getNextSlot();
@@ -727,7 +710,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
   SlotIndex Start = LIS.getMBBStartIdx(&MBB);
   DEBUG(dbgs() << "    leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start);
 
-  VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Start);
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
   if (!ParentVNI) {
     DEBUG(dbgs() << ": not live\n");
     return Start;
@@ -742,30 +725,169 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
 
 void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
   assert(OpenIdx && "openIntv not called before overlapIntv");
-  assert(Edit.getParent().getVNInfoAt(Start) ==
-         Edit.getParent().getVNInfoAt(End.getPrevSlot()) &&
+  const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+  assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) &&
          "Parent changes value in extended range");
-  assert(Edit.get(0)->getVNInfoAt(Start) && "Start must come from leaveIntv*");
   assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
          "Range cannot span basic blocks");
 
-  // Treat this as useIntv() for now. The complement interval will be extended
-  // as needed by mapValue().
+  // The complement interval will be extended as needed by extendRange().
+  if (ParentVNI)
+    markComplexMapped(0, ParentVNI);
   DEBUG(dbgs() << "    overlapIntv [" << Start << ';' << End << "):");
   RegAssign.insert(Start, End, OpenIdx);
   DEBUG(dump());
 }
 
-/// closeIntv - Indicate that we are done editing the currently open
-/// LiveInterval, and ranges can be trimmed.
-void SplitEditor::closeIntv() {
-  assert(OpenIdx && "openIntv not called before closeIntv");
-  OpenIdx = 0;
+/// transferValues - Transfer all possible values to the new live ranges.
+/// Values that were rematerialized are left alone, they need extendRange().
+bool SplitEditor::transferValues() {
+  bool Skipped = false;
+  LiveInBlocks.clear();
+  RegAssignMap::const_iterator AssignI = RegAssign.begin();
+  for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(),
+         ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) {
+    DEBUG(dbgs() << "  blit " << *ParentI << ':');
+    VNInfo *ParentVNI = ParentI->valno;
+    // RegAssign has holes where RegIdx 0 should be used.
+    SlotIndex Start = ParentI->start;
+    AssignI.advanceTo(Start);
+    do {
+      unsigned RegIdx;
+      SlotIndex End = ParentI->end;
+      if (!AssignI.valid()) {
+        RegIdx = 0;
+      } else if (AssignI.start() <= Start) {
+        RegIdx = AssignI.value();
+        if (AssignI.stop() < End) {
+          End = AssignI.stop();
+          ++AssignI;
+        }
+      } else {
+        RegIdx = 0;
+        End = std::min(End, AssignI.start());
+      }
+
+      // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
+      DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx);
+      LiveInterval *LI = Edit->get(RegIdx);
+
+      // Check for a simply defined value that can be blitted directly.
+      if (VNInfo *VNI = Values.lookup(std::make_pair(RegIdx, ParentVNI->id))) {
+        DEBUG(dbgs() << ':' << VNI->id);
+        LI->addRange(LiveRange(Start, End, VNI));
+        Start = End;
+        continue;
+      }
+
+      // Skip rematerialized values, we need to use extendRange() and
+      // extendPHIKillRanges() to completely recompute the live ranges.
+      if (Edit->didRematerialize(ParentVNI)) {
+        DEBUG(dbgs() << "(remat)");
+        Skipped = true;
+        Start = End;
+        continue;
+      }
+
+      // Initialize the live-out cache the first time it is needed.
+      if (LiveOutSeen.empty()) {
+        unsigned N = VRM.getMachineFunction().getNumBlockIDs();
+        LiveOutSeen.resize(N);
+        LiveOutCache.resize(N);
+      }
+
+      // This value has multiple defs in RegIdx, but it wasn't rematerialized,
+      // so the live range is accurate. Add live-in blocks in [Start;End) to the
+      // LiveInBlocks.
+      MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+      SlotIndex BlockStart, BlockEnd;
+      tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB);
+
+      // The first block may be live-in, or it may have its own def.
+      if (Start != BlockStart) {
+        VNInfo *VNI = LI->extendInBlock(BlockStart,
+                                        std::min(BlockEnd, End).getPrevSlot());
+        assert(VNI && "Missing def for complex mapped value");
+        DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
+        // MBB has its own def. Is it also live-out?
+        if (BlockEnd <= End) {
+          LiveOutSeen.set(MBB->getNumber());
+          LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]);
+        }
+        // Skip to the next block for live-in.
+        ++MBB;
+        BlockStart = BlockEnd;
+      }
+
+      // Handle the live-in blocks covered by [Start;End).
+      assert(Start <= BlockStart && "Expected live-in block");
+      while (BlockStart < End) {
+        DEBUG(dbgs() << ">BB#" << MBB->getNumber());
+        BlockEnd = LIS.getMBBEndIdx(MBB);
+        if (BlockStart == ParentVNI->def) {
+          // This block has the def of a parent PHI, so it isn't live-in.
+          assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
+          VNInfo *VNI = LI->extendInBlock(BlockStart,
+                                         std::min(BlockEnd, End).getPrevSlot());
+          assert(VNI && "Missing def for complex mapped parent PHI");
+          if (End >= BlockEnd) {
+            // Live-out as well.
+            LiveOutSeen.set(MBB->getNumber());
+            LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]);
+          }
+        } else {
+          // This block needs a live-in value.
+          LiveInBlocks.push_back(MDT[MBB]);
+          // The last block covered may not be live-out.
+          if (End < BlockEnd)
+            LiveInBlocks.back().Kill = End;
+          else {
+            // Live-out, but we need updateSSA to tell us the value.
+            LiveOutSeen.set(MBB->getNumber());
+            LiveOutCache[MBB] = LiveOutPair((VNInfo*)0,
+                                            (MachineDomTreeNode*)0);
+          }
+        }
+        BlockStart = BlockEnd;
+        ++MBB;
+      }
+      Start = End;
+    } while (Start != ParentI->end);
+    DEBUG(dbgs() << '\n');
+  }
+
+  if (!LiveInBlocks.empty())
+    updateSSA();
+
+  return Skipped;
+}
+
+void SplitEditor::extendPHIKillRanges() {
+    // Extend live ranges to be live-out for successor PHI values.
+  for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
+       E = Edit->getParent().vni_end(); I != E; ++I) {
+    const VNInfo *PHIVNI = *I;
+    if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
+      continue;
+    unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
+    MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
+    for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+      SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot();
+      // The predecessor may not have a live-out value. That is OK, like an
+      // undef PHI operand.
+      if (Edit->getParent().liveAt(End)) {
+        assert(RegAssign.lookup(End) == RegIdx &&
+               "Different register assignment in phi predecessor");
+        extendRange(RegIdx, End);
+      }
+    }
+  }
 }
 
-/// rewriteAssigned - Rewrite all uses of Edit.getReg().
-void SplitEditor::rewriteAssigned() {
-  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit.getReg()),
+/// rewriteAssigned - Rewrite all uses of Edit->getReg().
+void SplitEditor::rewriteAssigned(bool ExtendRanges) {
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()),
        RE = MRI.reg_end(); RI != RE;) {
     MachineOperand &MO = RI.getOperand();
     MachineInstr *MI = MO.getParent();
@@ -780,147 +902,145 @@ void SplitEditor::rewriteAssigned() {
     // <undef> operands don't really read the register, so just assign them to
     // the complement.
     if (MO.isUse() && MO.isUndef()) {
-      MO.setReg(Edit.get(0)->reg);
+      MO.setReg(Edit->get(0)->reg);
       continue;
     }
 
     SlotIndex Idx = LIS.getInstructionIndex(MI);
-    Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+    if (MO.isDef())
+      Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex();
 
     // Rewrite to the mapped register at Idx.
     unsigned RegIdx = RegAssign.lookup(Idx);
-    MO.setReg(Edit.get(RegIdx)->reg);
+    MO.setReg(Edit->get(RegIdx)->reg);
     DEBUG(dbgs() << "  rewr BB#" << MI->getParent()->getNumber() << '\t'
                  << Idx << ':' << RegIdx << '\t' << *MI);
 
-    // Extend liveness to Idx.
-    const VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
-    LIMappers[RegIdx].mapValue(ParentVNI, Idx);
+    // Extend liveness to Idx if the instruction reads reg.
+    if (!ExtendRanges)
+      continue;
+
+    // Skip instructions that don't read Reg.
+    if (MO.isDef()) {
+      if (!MO.getSubReg() && !MO.isEarlyClobber())
+        continue;
+      // We may wan't to extend a live range for a partial redef, or for a use
+      // tied to an early clobber.
+      Idx = Idx.getPrevSlot();
+      if (!Edit->getParent().liveAt(Idx))
+        continue;
+    } else
+      Idx = Idx.getUseIndex();
+
+    extendRange(RegIdx, Idx);
   }
 }
 
-/// rewriteSplit - Rewrite uses of Intvs[0] according to the ConEQ mapping.
-void SplitEditor::rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs,
-                                    const ConnectedVNInfoEqClasses &ConEq) {
-  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Intvs[0]->reg),
-       RE = MRI.reg_end(); RI != RE;) {
-    MachineOperand &MO = RI.getOperand();
-    MachineInstr *MI = MO.getParent();
-    ++RI;
-    if (MO.isUse() && MO.isUndef())
-      continue;
-    // DBG_VALUE instructions should have been eliminated earlier.
-    SlotIndex Idx = LIS.getInstructionIndex(MI);
-    Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
-    DEBUG(dbgs() << "  rewr BB#" << MI->getParent()->getNumber() << '\t'
-                 << Idx << ':');
-    const VNInfo *VNI = Intvs[0]->getVNInfoAt(Idx);
-    assert(VNI && "Interval not live at use.");
-    MO.setReg(Intvs[ConEq.getEqClass(VNI)]->reg);
-    DEBUG(dbgs() << VNI->id << '\t' << *MI);
+void SplitEditor::deleteRematVictims() {
+  SmallVector<MachineInstr*, 8> Dead;
+  for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){
+    LiveInterval *LI = *I;
+    for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end();
+           LII != LIE; ++LII) {
+      // Dead defs end at the store slot.
+      if (LII->end != LII->valno->def.getNextSlot())
+        continue;
+      MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def);
+      assert(MI && "Missing instruction for dead def");
+      MI->addRegisterDead(LI->reg, &TRI);
+
+      if (!MI->allDefsAreDead())
+        continue;
+
+      DEBUG(dbgs() << "All defs dead: " << *MI);
+      Dead.push_back(MI);
+    }
   }
+
+  if (Dead.empty())
+    return;
+
+  Edit->eliminateDeadDefs(Dead, LIS, VRM, TII);
 }
 
-void SplitEditor::finish() {
-  assert(OpenIdx == 0 && "Previous LI not closed before rewrite");
+void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
+  ++NumFinished;
 
   // At this point, the live intervals in Edit contain VNInfos corresponding to
   // the inserted copies.
 
   // Add the original defs from the parent interval.
-  for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(),
-         E = Edit.getParent().vni_end(); I != E; ++I) {
+  for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
+         E = Edit->getParent().vni_end(); I != E; ++I) {
     const VNInfo *ParentVNI = *I;
     if (ParentVNI->isUnused())
       continue;
-    LiveIntervalMap &LIM = LIMappers[RegAssign.lookup(ParentVNI->def)];
-    VNInfo *VNI = LIM.defValue(ParentVNI, ParentVNI->def);
-    LIM.getLI()->addRange(LiveRange(ParentVNI->def,
-                                    ParentVNI->def.getNextSlot(), VNI));
-    // Mark all values as complex to force liveness computation.
-    // This should really only be necessary for remat victims, but we are lazy.
-    LIM.markComplexMapped(ParentVNI);
+    unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
+    VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def);
+    VNI->setIsPHIDef(ParentVNI->isPHIDef());
+    VNI->setCopy(ParentVNI->getCopy());
+
+    // Mark rematted values as complex everywhere to force liveness computation.
+    // The new live ranges may be truncated.
+    if (Edit->didRematerialize(ParentVNI))
+      for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+        markComplexMapped(i, ParentVNI);
   }
 
 #ifndef NDEBUG
   // Every new interval must have a def by now, otherwise the split is bogus.
-  for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I)
+  for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I)
     assert((*I)->hasAtLeastOneValue() && "Split interval has no value");
 #endif
 
-  // FIXME: Don't recompute the liveness of all values, infer it from the
-  // overlaps between the parent live interval and RegAssign.
-  // The mapValue algorithm is only necessary when:
-  // - The parent value maps to multiple defs, and new phis are needed, or
-  // - The value has been rematerialized before some uses, and we want to
-  //   minimize the live range so it only reaches the remaining uses.
-  // All other values have simple liveness that can be computed from RegAssign
-  // and the parent live interval.
-
-  // Extend live ranges to be live-out for successor PHI values.
-  for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(),
-       E = Edit.getParent().vni_end(); I != E; ++I) {
-    const VNInfo *PHIVNI = *I;
-    if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
-      continue;
-    unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
-    LiveIntervalMap &LIM = LIMappers[RegIdx];
-    MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
-    DEBUG(dbgs() << "  map phi in BB#" << MBB->getNumber() << '@' << PHIVNI->def
-                 << " -> " << RegIdx << '\n');
-    for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
-         PE = MBB->pred_end(); PI != PE; ++PI) {
-      SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot();
-      DEBUG(dbgs() << "    pred BB#" << (*PI)->getNumber() << '@' << End);
-      // The predecessor may not have a live-out value. That is OK, like an
-      // undef PHI operand.
-      if (VNInfo *VNI = Edit.getParent().getVNInfoAt(End)) {
-        DEBUG(dbgs() << " has parent valno #" << VNI->id << " live out\n");
-        assert(RegAssign.lookup(End) == RegIdx &&
-               "Different register assignment in phi predecessor");
-        LIM.mapValue(VNI, End);
-      }
-      else
-        DEBUG(dbgs() << " is not live-out\n");
-    }
-    DEBUG(dbgs() << "    " << *LIM.getLI() << '\n');
-  }
+  // Transfer the simply mapped values, check if any are skipped.
+  bool Skipped = transferValues();
+  if (Skipped)
+    extendPHIKillRanges();
+  else
+    ++NumSimple;
 
-  // Rewrite instructions.
-  rewriteAssigned();
+  // Rewrite virtual registers, possibly extending ranges.
+  rewriteAssigned(Skipped);
 
-  // FIXME: Delete defs that were rematted everywhere.
+  // Delete defs that were rematted everywhere.
+  if (Skipped)
+    deleteRematVictims();
 
   // Get rid of unused values and set phi-kill flags.
-  for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I)
+  for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I)
     (*I)->RenumberValues(LIS);
 
+  // Provide a reverse mapping from original indices to Edit ranges.
+  if (LRMap) {
+    LRMap->clear();
+    for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+      LRMap->push_back(i);
+  }
+
   // Now check if any registers were separated into multiple components.
   ConnectedVNInfoEqClasses ConEQ(LIS);
-  for (unsigned i = 0, e = Edit.size(); i != e; ++i) {
+  for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
     // Don't use iterators, they are invalidated by create() below.
-    LiveInterval *li = Edit.get(i);
+    LiveInterval *li = Edit->get(i);
     unsigned NumComp = ConEQ.Classify(li);
     if (NumComp <= 1)
       continue;
     DEBUG(dbgs() << "  " << NumComp << " components: " << *li << '\n');
     SmallVector<LiveInterval*, 8> dups;
     dups.push_back(li);
-    for (unsigned i = 1; i != NumComp; ++i)
-      dups.push_back(&Edit.create(MRI, LIS, VRM));
-    rewriteComponents(dups, ConEQ);
-    ConEQ.Distribute(&dups[0]);
+    for (unsigned j = 1; j != NumComp; ++j)
+      dups.push_back(&Edit->create(LIS, VRM));
+    ConEQ.Distribute(&dups[0], MRI);
+    // The new intervals all map back to i.
+    if (LRMap)
+      LRMap->resize(Edit->size(), i);
   }
 
   // Calculate spill weight and allocation hints for new intervals.
-  VirtRegAuxInfo vrai(VRM.getMachineFunction(), LIS, SA.Loops);
-  for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I){
-    LiveInterval &li = **I;
-    vrai.CalculateRegClass(li.reg);
-    vrai.CalculateWeightAndHint(li);
-    DEBUG(dbgs() << "  new interval " << MRI.getRegClass(li.reg)->getName()
-                 << ":" << li << '\n');
-  }
+  Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops);
+
+  assert(!LRMap || LRMap->size() == Edit->size());
 }
 
 
@@ -932,113 +1052,42 @@ void SplitEditor::finish() {
 /// may be an advantage to split CurLI for the duration of the block.
 bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
   // If CurLI is local to one block, there is no point to splitting it.
-  if (LiveBlocks.size() <= 1)
+  if (UseBlocks.size() <= 1)
     return false;
   // Add blocks with multiple uses.
-  for (unsigned i = 0, e = LiveBlocks.size(); i != e; ++i) {
-    const BlockInfo &BI = LiveBlocks[i];
-    if (!BI.Uses)
-      continue;
-    unsigned Instrs = UsingBlocks.lookup(BI.MBB);
-    if (Instrs <= 1)
-      continue;
-    if (Instrs == 2 && BI.LiveIn && BI.LiveOut && !BI.LiveThrough)
+  for (unsigned i = 0, e = UseBlocks.size(); i != e; ++i) {
+    const BlockInfo &BI = UseBlocks[i];
+    if (BI.FirstUse == BI.LastUse)
       continue;
     Blocks.insert(BI.MBB);
   }
   return !Blocks.empty();
 }
 
-/// splitSingleBlocks - Split CurLI into a separate live interval inside each
-/// basic block in Blocks.
-void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
-  DEBUG(dbgs() << "  splitSingleBlocks for " << Blocks.size() << " blocks.\n");
-
-  for (unsigned i = 0, e = SA.LiveBlocks.size(); i != e; ++i) {
-    const SplitAnalysis::BlockInfo &BI = SA.LiveBlocks[i];
-    if (!BI.Uses || !Blocks.count(BI.MBB))
-      continue;
-
-    openIntv();
-    SlotIndex SegStart = enterIntvBefore(BI.FirstUse);
-    if (!BI.LiveOut || BI.LastUse < BI.LastSplitPoint) {
-      useIntv(SegStart, leaveIntvAfter(BI.LastUse));
-    } else {
+void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) {
+  openIntv();
+  SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber());
+  SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstUse,
+    LastSplitPoint));
+  if (!BI.LiveOut || BI.LastUse < LastSplitPoint) {
+    useIntv(SegStart, leaveIntvAfter(BI.LastUse));
+  } else {
       // The last use is after the last valid split point.
-      SlotIndex SegStop = leaveIntvBefore(BI.LastSplitPoint);
-      useIntv(SegStart, SegStop);
-      overlapIntv(SegStop, BI.LastUse);
-    }
-    closeIntv();
+    SlotIndex SegStop = leaveIntvBefore(LastSplitPoint);
+    useIntv(SegStart, SegStop);
+    overlapIntv(SegStop, BI.LastUse);
   }
-  finish();
 }
 
-
-//===----------------------------------------------------------------------===//
-//                            Sub Block Splitting
-//===----------------------------------------------------------------------===//
-
-/// getBlockForInsideSplit - If CurLI is contained inside a single basic block,
-/// and it wou pay to subdivide the interval inside that block, return it.
-/// Otherwise return NULL. The returned block can be passed to
-/// SplitEditor::splitInsideBlock.
-const MachineBasicBlock *SplitAnalysis::getBlockForInsideSplit() {
-  // The interval must be exclusive to one block.
-  if (UsingBlocks.size() != 1)
-    return 0;
-  // Don't to this for less than 4 instructions. We want to be sure that
-  // splitting actually reduces the instruction count per interval.
-  if (UsingInstrs.size() < 4)
-    return 0;
-  return UsingBlocks.begin()->first;
-}
-
-/// splitInsideBlock - Split CurLI into multiple intervals inside MBB.
-void SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) {
-  SmallVector<SlotIndex, 32> Uses;
-  Uses.reserve(SA.UsingInstrs.size());
-  for (SplitAnalysis::InstrPtrSet::const_iterator I = SA.UsingInstrs.begin(),
-       E = SA.UsingInstrs.end(); I != E; ++I)
-    if ((*I)->getParent() == MBB)
-      Uses.push_back(LIS.getInstructionIndex(*I));
-  DEBUG(dbgs() << "  splitInsideBlock BB#" << MBB->getNumber() << " for "
-               << Uses.size() << " instructions.\n");
-  assert(Uses.size() >= 3 && "Need at least 3 instructions");
-  array_pod_sort(Uses.begin(), Uses.end());
-
-  // Simple algorithm: Find the largest gap between uses as determined by slot
-  // indices. Create new intervals for instructions before the gap and after the
-  // gap.
-  unsigned bestPos = 0;
-  int bestGap = 0;
-  DEBUG(dbgs() << "    dist (" << Uses[0]);
-  for (unsigned i = 1, e = Uses.size(); i != e; ++i) {
-    int g = Uses[i-1].distance(Uses[i]);
-    DEBUG(dbgs() << ") -" << g << "- (" << Uses[i]);
-    if (g > bestGap)
-      bestPos = i, bestGap = g;
-  }
-  DEBUG(dbgs() << "), best: -" << bestGap << "-\n");
-
-  // bestPos points to the first use after the best gap.
-  assert(bestPos > 0 && "Invalid gap");
-
-  // FIXME: Don't create intervals for low densities.
-
-  // First interval before the gap. Don't create single-instr intervals.
-  if (bestPos > 1) {
-    openIntv();
-    useIntv(enterIntvBefore(Uses.front()), leaveIntvAfter(Uses[bestPos-1]));
-    closeIntv();
-  }
-
-  // Second interval after the gap.
-  if (bestPos < Uses.size()-1) {
-    openIntv();
-    useIntv(enterIntvBefore(Uses[bestPos]), leaveIntvAfter(Uses.back()));
-    closeIntv();
+/// splitSingleBlocks - Split CurLI into a separate live interval inside each
+/// basic block in Blocks.
+void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
+  DEBUG(dbgs() << "  splitSingleBlocks for " << Blocks.size() << " blocks.\n");
+  ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA.getUseBlocks();
+  for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+    const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+    if (Blocks.count(BI.MBB))
+      splitSingleBlock(BI);
   }
-
   finish();
 }
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index e02e629..2ae760a 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -12,7 +12,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_CODEGEN_SPLITKIT_H
+#define LLVM_CODEGEN_SPLITKIT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/SlotIndexes.h"
@@ -48,17 +54,9 @@ public:
   const MachineLoopInfo &Loops;
   const TargetInstrInfo &TII;
 
-  // Instructions using the the current register.
-  typedef SmallPtrSet<const MachineInstr*, 16> InstrPtrSet;
-  InstrPtrSet UsingInstrs;
-
   // Sorted slot indexes of using instructions.
   SmallVector<SlotIndex, 8> UseSlots;
 
-  // The number of instructions using CurLI in each basic block.
-  typedef DenseMap<const MachineBasicBlock*, unsigned> BlockCountMap;
-  BlockCountMap UsingBlocks;
-
   /// Additional information about basic blocks where the current variable is
   /// live. Such a block will look like one of these templates:
   ///
@@ -75,35 +73,37 @@ public:
     SlotIndex LastUse;    ///< Last instr using current reg.
     SlotIndex Kill;       ///< Interval end point inside block.
     SlotIndex Def;        ///< Interval start point inside block.
-    /// Last possible point for splitting live ranges.
-    SlotIndex LastSplitPoint;
-    bool Uses;            ///< Current reg has uses or defs in block.
     bool LiveThrough;     ///< Live in whole block (Templ 5. or 6. above).
     bool LiveIn;          ///< Current reg is live in.
     bool LiveOut;         ///< Current reg is live out.
-
-    // Per-interference pattern scratch data.
-    bool OverlapEntry;    ///< Interference overlaps entering interval.
-    bool OverlapExit;     ///< Interference overlaps exiting interval.
   };
 
-  /// Basic blocks where var is live. This array is parallel to
-  /// SpillConstraints.
-  SmallVector<BlockInfo, 8> LiveBlocks;
-
 private:
   // Current live interval.
   const LiveInterval *CurLI;
 
+  /// LastSplitPoint - Last legal split point in each basic block in the current
+  /// function. The first entry is the first terminator, the second entry is the
+  /// last valid split point for a variable that is live in to a landing pad
+  /// successor.
+  SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastSplitPoint;
+
+  /// UseBlocks - Blocks where CurLI has uses.
+  SmallVector<BlockInfo, 8> UseBlocks;
+
+  /// ThroughBlocks - Block numbers where CurLI is live through without uses.
+  BitVector ThroughBlocks;
+
+  /// NumThroughBlocks - Number of live-through blocks.
+  unsigned NumThroughBlocks;
+
+  SlotIndex computeLastSplitPoint(unsigned Num);
+
   // Sumarize statistics by counting instructions using CurLI.
   void analyzeUses();
 
   /// calcLiveBlockInfo - Compute per-block information about CurLI.
-  void calcLiveBlockInfo();
-
-  /// canAnalyzeBranch - Return true if MBB ends in a branch that can be
-  /// analyzed.
-  bool canAnalyzeBranch(const MachineBasicBlock *MBB);
+  bool calcLiveBlockInfo();
 
 public:
   SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
@@ -120,9 +120,14 @@ public:
   /// getParent - Return the last analyzed interval.
   const LiveInterval &getParent() const { return *CurLI; }
 
-  /// hasUses - Return true if MBB has any uses of CurLI.
-  bool hasUses(const MachineBasicBlock *MBB) const {
-    return UsingBlocks.lookup(MBB);
+  /// getLastSplitPoint - Return that base index of the last valid split point
+  /// in the basic block numbered Num.
+  SlotIndex getLastSplitPoint(unsigned Num) {
+    // Inline the common simple case.
+    if (LastSplitPoint[Num].first.isValid() &&
+        !LastSplitPoint[Num].second.isValid())
+      return LastSplitPoint[Num].first;
+    return computeLastSplitPoint(Num);
   }
 
   /// isOriginalEndpoint - Return true if the original live range was killed or
@@ -132,127 +137,30 @@ public:
   /// splitting.
   bool isOriginalEndpoint(SlotIndex Idx) const;
 
-  typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
-
-  // Print a set of blocks with use counts.
-  void print(const BlockPtrSet&, raw_ostream&) const;
-
-  /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from
-  /// having CurLI split to a new live interval. Return true if Blocks can be
-  /// passed to SplitEditor::splitSingleBlocks.
-  bool getMultiUseBlocks(BlockPtrSet &Blocks);
-
-  /// getBlockForInsideSplit - If CurLI is contained inside a single basic
-  /// block, and it would pay to subdivide the interval inside that block,
-  /// return it. Otherwise return NULL. The returned block can be passed to
-  /// SplitEditor::splitInsideBlock.
-  const MachineBasicBlock *getBlockForInsideSplit();
-};
-
+  /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks
+  /// where CurLI has uses.
+  ArrayRef<BlockInfo> getUseBlocks() { return UseBlocks; }
 
-/// LiveIntervalMap - Map values from a large LiveInterval into a small
-/// interval that is a subset. Insert phi-def values as needed. This class is
-/// used by SplitEditor to create new smaller LiveIntervals.
-///
-/// ParentLI is the larger interval, LI is the subset interval. Every value
-/// in LI corresponds to exactly one value in ParentLI, and the live range
-/// of the value is contained within the live range of the ParentLI value.
-/// Values in ParentLI may map to any number of OpenLI values, including 0.
-class LiveIntervalMap {
-  LiveIntervals &LIS;
-  MachineDominatorTree &MDT;
+  /// getNumThroughBlocks - Return the number of through blocks.
+  unsigned getNumThroughBlocks() const { return NumThroughBlocks; }
 
-  // The parent interval is never changed.
-  const LiveInterval &ParentLI;
+  /// isThroughBlock - Return true if CurLI is live through MBB without uses.
+  bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); }
 
-  // The child interval's values are fully contained inside ParentLI values.
-  LiveInterval *LI;
+  /// getThroughBlocks - Return the set of through blocks.
+  const BitVector &getThroughBlocks() const { return ThroughBlocks; }
 
-  typedef DenseMap<const VNInfo*, VNInfo*> ValueMap;
+  /// countLiveBlocks - Return the number of blocks where li is live.
+  /// This is guaranteed to return the same number as getNumThroughBlocks() +
+  /// getUseBlocks().size() after calling analyze(li).
+  unsigned countLiveBlocks(const LiveInterval *li) const;
 
-  // Map ParentLI values to simple values in LI that are defined at the same
-  // SlotIndex, or NULL for ParentLI values that have complex LI defs.
-  // Note there is a difference between values mapping to NULL (complex), and
-  // values not present (unknown/unmapped).
-  ValueMap Values;
-
-  typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
-  typedef DenseMap<MachineBasicBlock*,LiveOutPair> LiveOutMap;
-
-  // LiveOutCache - Map each basic block where LI is live out to the live-out
-  // value and its defining block. One of these conditions shall be true:
-  //
-  //  1. !LiveOutCache.count(MBB)
-  //  2. LiveOutCache[MBB].second.getNode() == MBB
-  //  3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB]
-  //
-  // This is only a cache, the values can be computed as:
-  //
-  //  VNI = LI->getVNInfoAt(LIS.getMBBEndIdx(MBB))
-  //  Node = mbt_[LIS.getMBBFromIndex(VNI->def)]
-  //
-  // The cache is also used as a visiteed set by mapValue().
-  LiveOutMap LiveOutCache;
-
-  // Dump the live-out cache to dbgs().
-  void dumpCache();
-
-public:
-  LiveIntervalMap(LiveIntervals &lis,
-                  MachineDominatorTree &mdt,
-                  const LiveInterval &parentli)
-    : LIS(lis), MDT(mdt), ParentLI(parentli), LI(0) {}
-
-  /// reset - clear all data structures and start a new live interval.
-  void reset(LiveInterval *);
-
-  /// getLI - return the current live interval.
-  LiveInterval *getLI() const { return LI; }
-
-  /// defValue - define a value in LI from the ParentLI value VNI and Idx.
-  /// Idx does not have to be ParentVNI->def, but it must be contained within
-  /// ParentVNI's live range in ParentLI.
-  /// Return the new LI value.
-  VNInfo *defValue(const VNInfo *ParentVNI, SlotIndex Idx);
-
-  /// mapValue - map ParentVNI to the corresponding LI value at Idx. It is
-  /// assumed that ParentVNI is live at Idx.
-  /// If ParentVNI has not been defined by defValue, it is assumed that
-  /// ParentVNI->def dominates Idx.
-  /// If ParentVNI has been defined by defValue one or more times, a value that
-  /// dominates Idx will be returned. This may require creating extra phi-def
-  /// values and adding live ranges to LI.
-  /// If simple is not NULL, *simple will indicate if ParentVNI is a simply
-  /// mapped value.
-  VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx, bool *simple = 0);
-
-  // extendTo - Find the last LI value defined in MBB at or before Idx. The
-  // parentli is assumed to be live at Idx. Extend the live range to include
-  // Idx. Return the found VNInfo, or NULL.
-  VNInfo *extendTo(const MachineBasicBlock *MBB, SlotIndex Idx);
-
-  /// isMapped - Return true is ParentVNI is a known mapped value. It may be a
-  /// simple 1-1 mapping or a complex mapping to later defs.
-  bool isMapped(const VNInfo *ParentVNI) const {
-    return Values.count(ParentVNI);
-  }
-
-  /// isComplexMapped - Return true if ParentVNI has received new definitions
-  /// with defValue.
-  bool isComplexMapped(const VNInfo *ParentVNI) const;
-
-  /// markComplexMapped - Mark ParentVNI as complex mapped regardless of the
-  /// number of definitions.
-  void markComplexMapped(const VNInfo *ParentVNI) { Values[ParentVNI] = 0; }
-
-  // addSimpleRange - Add a simple range from ParentLI to LI.
-  // ParentVNI must be live in the [Start;End) interval.
-  void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI);
+  typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
 
-  /// addRange - Add live ranges to LI where [Start;End) intersects ParentLI.
-  /// All needed values whose def is not inside [Start;End) must be defined
-  /// beforehand so mapValue will work.
-  void addRange(SlotIndex Start, SlotIndex End);
+  /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from
+  /// having CurLI split to a new live interval. Return true if Blocks can be
+  /// passed to SplitEditor::splitSingleBlocks.
+  bool getMultiUseBlocks(BlockPtrSet &Blocks);
 };
 
 
@@ -277,7 +185,7 @@ class SplitEditor {
   const TargetRegisterInfo &TRI;
 
   /// Edit - The current parent register and new intervals created.
-  LiveRangeEdit &Edit;
+  LiveRangeEdit *Edit;
 
   /// Index into Edit of the currently open interval.
   /// The index 0 is used for the complement, so the first interval started by
@@ -295,8 +203,76 @@ class SplitEditor {
   /// Idx.
   RegAssignMap RegAssign;
 
-  /// LIMappers - One LiveIntervalMap or each interval in Edit.
-  SmallVector<LiveIntervalMap, 4> LIMappers;
+  typedef DenseMap<std::pair<unsigned, unsigned>, VNInfo*> ValueMap;
+
+  /// Values - keep track of the mapping from parent values to values in the new
+  /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains:
+  ///
+  /// 1. No entry - the value is not mapped to Edit.get(RegIdx).
+  /// 2. Null - the value is mapped to multiple values in Edit.get(RegIdx).
+  ///    Each value is represented by a minimal live range at its def.
+  /// 3. A non-null VNInfo - the value is mapped to a single new value.
+  ///    The new value has no live ranges anywhere.
+  ValueMap Values;
+
+  typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
+  typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap;
+
+  // LiveOutCache - Map each basic block where a new register is live out to the
+  // live-out value and its defining block.
+  // One of these conditions shall be true:
+  //
+  //  1. !LiveOutCache.count(MBB)
+  //  2. LiveOutCache[MBB].second.getNode() == MBB
+  //  3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB]
+  //
+  // This is only a cache, the values can be computed as:
+  //
+  //  VNI = Edit.get(RegIdx)->getVNInfoAt(LIS.getMBBEndIdx(MBB))
+  //  Node = mbt_[LIS.getMBBFromIndex(VNI->def)]
+  //
+  // The cache is also used as a visited set by extendRange(). It can be shared
+  // by all the new registers because at most one is live out of each block.
+  LiveOutMap LiveOutCache;
+
+  // LiveOutSeen - Indexed by MBB->getNumber(), a bit is set for each valid
+  // entry in LiveOutCache.
+  BitVector LiveOutSeen;
+
+  /// LiveInBlock - Info for updateSSA() about a block where a register is
+  /// live-in.
+  /// The updateSSA caller provides DomNode and Kill inside MBB, updateSSA()
+  /// adds the computed live-in value.
+  struct LiveInBlock {
+    // Dominator tree node for the block.
+    // Cleared by updateSSA when the final value has been determined.
+    MachineDomTreeNode *DomNode;
+
+    // Live-in value filled in by updateSSA once it is known.
+    VNInfo *Value;
+
+    // Position in block where the live-in range ends, or SlotIndex() if the
+    // range passes through the block.
+    SlotIndex Kill;
+
+    LiveInBlock(MachineDomTreeNode *node) : DomNode(node), Value(0) {}
+  };
+
+  /// LiveInBlocks - List of live-in blocks used by findReachingDefs() and
+  /// updateSSA(). This list is usually empty, it exists here to avoid frequent
+  /// reallocations.
+  SmallVector<LiveInBlock, 16> LiveInBlocks;
+
+  /// defValue - define a value in RegIdx from ParentVNI at Idx.
+  /// Idx does not have to be ParentVNI->def, but it must be contained within
+  /// ParentVNI's live range in ParentLI. The new value is added to the value
+  /// map.
+  /// Return the new LI value.
+  VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx);
+
+  /// markComplexMapped - Mark ParentVNI as complex mapped in RegIdx regardless
+  /// of the number of defs.
+  void markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI);
 
   /// defFromParent - Define Reg from ParentVNI at UseIdx using either
   /// rematerialization or a COPY from parent. Return the new value.
@@ -306,27 +282,56 @@ class SplitEditor {
                         MachineBasicBlock &MBB,
                         MachineBasicBlock::iterator I);
 
+  /// extendRange - Extend the live range of Edit.get(RegIdx) so it reaches Idx.
+  /// Insert PHIDefs as needed to preserve SSA form.
+  void extendRange(unsigned RegIdx, SlotIndex Idx);
+
+  /// findReachingDefs - Starting from MBB, add blocks to LiveInBlocks until all
+  /// reaching defs for LI are found.
+  /// @param LI   Live interval whose value is needed.
+  /// @param MBB  Block where LI should be live-in.
+  /// @param Kill Kill point in MBB.
+  /// @return Unique value seen, or NULL.
+  VNInfo *findReachingDefs(LiveInterval *LI, MachineBasicBlock *MBB,
+                           SlotIndex Kill);
+
+  /// updateSSA - Compute and insert PHIDefs such that all blocks in
+  // LiveInBlocks get a known live-in value. Add live ranges to the blocks.
+  void updateSSA();
+
+  /// transferValues - Transfer values to the new ranges.
+  /// Return true if any ranges were skipped.
+  bool transferValues();
+
+  /// extendPHIKillRanges - Extend the ranges of all values killed by original
+  /// parent PHIDefs.
+  void extendPHIKillRanges();
+
   /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers.
-  void rewriteAssigned();
+  void rewriteAssigned(bool ExtendRanges);
 
-  /// rewriteComponents - Rewrite all uses of Intv[0] according to the eq
-  /// classes in ConEQ.
-  /// This must be done when Intvs[0] is styill live at all uses, before calling
-  /// ConEq.Distribute().
-  void rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs,
-                         const ConnectedVNInfoEqClasses &ConEq);
+  /// deleteRematVictims - Delete defs that are dead after rematerializing.
+  void deleteRematVictims();
 
 public:
   /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
   /// Newly created intervals will be appended to newIntervals.
   SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&,
-              MachineDominatorTree&, LiveRangeEdit&);
+              MachineDominatorTree&);
 
-  /// getAnalysis - Get the corresponding analysis.
-  SplitAnalysis &getAnalysis() { return SA; }
+  /// reset - Prepare for a new split.
+  void reset(LiveRangeEdit&);
 
   /// Create a new virtual register and live interval.
-  void openIntv();
+  /// Return the interval index, starting from 1. Interval index 0 is the
+  /// implicit complement interval.
+  unsigned openIntv();
+
+  /// currentIntv - Return the current interval index.
+  unsigned currentIntv() const { return OpenIdx; }
+
+  /// selectIntv - Select a previously opened interval index.
+  void selectIntv(unsigned Idx);
 
   /// enterIntvBefore - Enter the open interval before the instruction at Idx.
   /// If the parent interval is not live before Idx, a COPY is not inserted.
@@ -369,25 +374,28 @@ public:
   ///
   void overlapIntv(SlotIndex Start, SlotIndex End);
 
-  /// closeIntv - Indicate that we are done editing the currently open
-  /// LiveInterval, and ranges can be trimmed.
-  void closeIntv();
-
   /// finish - after all the new live ranges have been created, compute the
   /// remaining live range, and rewrite instructions to use the new registers.
-  void finish();
+  /// @param LRMap When not null, this vector will map each live range in Edit
+  ///              back to the indices returned by openIntv.
+  ///              There may be extra indices created by dead code elimination.
+  void finish(SmallVectorImpl<unsigned> *LRMap = 0);
 
   /// dump - print the current interval maping to dbgs().
   void dump() const;
 
   // ===--- High level methods ---===
 
+  /// splitSingleBlock - Split CurLI into a separate live interval around the
+  /// uses in a single block. This is intended to be used as part of a larger
+  /// split, and doesn't call finish().
+  void splitSingleBlock(const SplitAnalysis::BlockInfo &BI);
+
   /// splitSingleBlocks - Split CurLI into a separate live interval inside each
   /// basic block in Blocks.
   void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
-
-  /// splitInsideBlock - Split CurLI into multiple intervals inside MBB.
-  void splitInsideBlock(const MachineBasicBlock *);
 };
 
 }
+
+#endif
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index fcaee42..f0a44ab 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -153,7 +153,6 @@ bool StackProtector::InsertStackProtectors() {
 
   for (Function::iterator I = F->begin(), E = F->end(); I != E; ) {
     BasicBlock *BB = I++;
-
     ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
     if (!RI) continue;
 
@@ -191,8 +190,6 @@ bool StackProtector::InsertStackProtectors() {
 
       // Create the basic block to jump to when the guard check fails.
       FailBB = CreateFailBB();
-      if (DT)
-        FailBBDom = DT->isReachableFromEntry(BB) ? BB : 0;
     }
 
     // For each block with a return instruction, convert this:
@@ -219,9 +216,10 @@ bool StackProtector::InsertStackProtectors() {
 
     // Split the basic block before the return instruction.
     BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
-    if (DT) {
-      DT->addNewBlock(NewBB, DT->isReachableFromEntry(BB) ? BB : 0);
-      FailBBDom = DT->findNearestCommonDominator(FailBBDom, BB);
+
+    if (DT && DT->isReachableFromEntry(BB)) {
+      DT->addNewBlock(NewBB, BB);
+      FailBBDom = FailBBDom ? DT->findNearestCommonDominator(FailBBDom, BB) :BB;
     }
 
     // Remove default branch instruction to the new BB.
@@ -242,7 +240,7 @@ bool StackProtector::InsertStackProtectors() {
   // statements in the function.
   if (!FailBB) return false;
 
-  if (DT)
+  if (DT && FailBBDom)
     DT->addNewBlock(FailBB, FailBBDom);
 
   return true;
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index ec7829e..227eb47 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -587,7 +587,7 @@ StrongPHIElimination::SplitInterferencesForBasicBlock(
   }
 
   // We now walk the PHIs in successor blocks and check for interferences. This
-  // is necesary because the use of a PHI's operands are logically contained in
+  // is necessary because the use of a PHI's operands are logically contained in
   // the predecessor block. The def of a PHI's destination register is processed
   // along with the other defs in a basic block.
 
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 15340a3..b9fcd38 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -388,11 +388,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
     if (MO.isDef() != (i == 0))
       return false;
 
-    // For the def, it should be the only def of that register.
-    if (MO.isDef() && (llvm::next(MRI.def_begin(Reg)) != MRI.def_end() ||
-                       MRI.isLiveIn(Reg)))
-      return false;
-
     // Don't allow any virtual-register uses. Rematting an instruction with
     // virtual register uses would length the live ranges of the uses, which
     // is not necessarily a good idea, certainly not "trivial".
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index fa311dc..6ed91b0 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -23,6 +23,7 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
@@ -176,6 +177,52 @@ const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const {
                                     SectionKind::getDataRel());
 }
 
+MCSymbol *
+TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
+                                                     Mangler *Mang,
+                                                MachineModuleInfo *MMI) const {
+  unsigned Encoding = getPersonalityEncoding();
+  switch (Encoding & 0x70) {
+  default:
+    report_fatal_error("We do not support this DWARF encoding yet!");
+  case dwarf::DW_EH_PE_absptr:
+    return  Mang->getSymbol(GV);
+    break;
+  case dwarf::DW_EH_PE_pcrel: {
+    Twine FullName = StringRef("DW.ref.") + Mang->getSymbol(GV)->getName();
+    return getContext().GetOrCreateSymbol(FullName);
+    break;
+  }
+  }
+}
+
+void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
+                                                       const TargetMachine &TM,
+                                                       const MCSymbol *Sym) const {
+  Twine FullName = StringRef("DW.ref.") + Sym->getName();
+  MCSymbol *Label = getContext().GetOrCreateSymbol(FullName);
+  Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
+  Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
+  Twine SectionName = StringRef(".data.") + Label->getName();
+  SmallString<64> NameData;
+  SectionName.toVector(NameData);
+  unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
+  const MCSection *Sec = getContext().getELFSection(NameData,
+                                                    ELF::SHT_PROGBITS,
+                                                    Flags,
+                                                    SectionKind::getDataRel(),
+                                                    0, Label->getName());
+  Streamer.SwitchSection(Sec);
+  Streamer.EmitValueToAlignment(8);
+  Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
+  const MCExpr *E = MCConstantExpr::Create(8, getContext());
+  Streamer.EmitELFSize(Label, E);
+  Streamer.EmitLabel(Label);
+
+  unsigned Size = TM.getTargetData()->getPointerSize();
+  Streamer.EmitSymbolValue(Sym, Size);
+}
+
 static SectionKind
 getELFKindForNamedSection(StringRef Name, SectionKind K) {
   // FIXME: Why is this here? Codegen is should not be in the business
@@ -424,8 +471,7 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
     }
 
     return TargetLoweringObjectFile::
-      getExprForDwarfReference(SSym, Mang, MMI,
-                               Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+      getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
   }
 
   return TargetLoweringObjectFile::
@@ -446,18 +492,10 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   IsFunctionEHFrameSymbolPrivate = false;
   SupportsWeakOmittedEHFrame = false;
 
+  // .comm doesn't support alignment before Leopard.
   Triple T(((LLVMTargetMachine&)TM).getTargetTriple());
-  if (T.getOS() == Triple::Darwin) {
-    switch (T.getDarwinMajorNumber()) {
-    case 7:  // 10.3 Panther.
-    case 8:  // 10.4 Tiger.
-      CommDirectiveSupportsAlignment = false;
-      break;
-    case 9:   // 10.5 Leopard.
-    case 10:  // 10.6 SnowLeopard.
-      break;
-    }
-  }
+  if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5))
+    CommDirectiveSupportsAlignment = false;
 
   TargetLoweringObjectFile::Initialize(Ctx, TM);
 
@@ -641,10 +679,11 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
                          Mangler *Mang, const TargetMachine &TM) const {
   // Parse the section specifier and create it if valid.
   StringRef Segment, Section;
-  unsigned TAA = (unsigned)MCSectionMachO::SECTION_ATTRIBUTES, StubSize = 0;
+  unsigned TAA = 0, StubSize = 0;
+  bool TAAParsed;
   std::string ErrorCode =
     MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
-                                          TAA, StubSize);
+                                          TAA, TAAParsed, StubSize);
   if (!ErrorCode.empty()) {
     // If invalid, report the error with report_fatal_error.
     report_fatal_error("Global variable '" + GV->getNameStr() +
@@ -654,17 +693,13 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
     return DataSection;
   }
 
-  bool TAAWasSet = (TAA != MCSectionMachO::SECTION_ATTRIBUTES);
-  if (!TAAWasSet)
-    TAA = 0;      // Sensible default if this is a new section.
-    
   // Get the section.
   const MCSectionMachO *S =
     getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
 
   // If TAA wasn't set by ParseSectionSpecifier() above,
   // use the value returned by getMachOSection() as a default.
-  if (!TAAWasSet)
+  if (!TAAParsed)
     TAA = S->getTypeAndAttributes();
 
   // Okay, now that we got the section, verify that the TAA & StubSize agree.
@@ -806,14 +841,36 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
     }
 
     return TargetLoweringObjectFile::
-      getExprForDwarfReference(SSym, Mang, MMI,
-                               Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+      getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
   }
 
   return TargetLoweringObjectFile::
     getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
 }
 
+MCSymbol *TargetLoweringObjectFileMachO::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+                        MachineModuleInfo *MMI) const {
+  // The mach-o version of this method defaults to returning a stub reference.
+  MachineModuleInfoMachO &MachOMMI =
+    MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+  SmallString<128> Name;
+  Mang->getNameWithPrefix(Name, GV, true);
+  Name += "$non_lazy_ptr";
+
+  // Add information about the stub reference to MachOMMI so that the stub
+  // gets emitted by the asmprinter.
+  MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+  MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
+  if (StubSym.getPointer() == 0) {
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+  }
+
+  return SSym;
+}
+
 unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const {
   return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
 }
@@ -822,7 +879,7 @@ unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const {
   return DW_EH_PE_pcrel;
 }
 
-unsigned TargetLoweringObjectFileMachO::getFDEEncoding() const {
+unsigned TargetLoweringObjectFileMachO::getFDEEncoding(bool CFI) const {
   return DW_EH_PE_pcrel;
 }
 
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index b3120b8..52ea872 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -105,7 +105,7 @@ namespace {
                             MachineFunction::iterator &mbbi,
                             unsigned RegB, unsigned RegC, unsigned Dist);
 
-    bool isProfitableToConv3Addr(unsigned RegA);
+    bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
 
     bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
                             MachineBasicBlock::iterator &nmi,
@@ -124,7 +124,11 @@ namespace {
                                  MachineBasicBlock::iterator &nmi,
                                  MachineFunction::iterator &mbbi,
                                  unsigned SrcIdx, unsigned DstIdx,
-                                 unsigned Dist);
+                                 unsigned Dist,
+                                 SmallPtrSet<MachineInstr*, 8> &Processed);
+
+    void ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
+                  SmallPtrSet<MachineInstr*, 8> &Processed);
 
     void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
                      SmallPtrSet<MachineInstr*, 8> &Processed);
@@ -615,16 +619,18 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
 /// isProfitableToConv3Addr - Return true if it is profitable to convert the
 /// given 2-address instruction to a 3-address one.
 bool
-TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA) {
+TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
   // Look for situations like this:
   // %reg1024<def> = MOV r1
   // %reg1025<def> = MOV r0
   // %reg1026<def> = ADD %reg1024, %reg1025
   // r2            = MOV %reg1026
   // Turn ADD into a 3-address instruction to avoid a copy.
-  unsigned FromRegA = getMappedReg(RegA, SrcRegMap);
+  unsigned FromRegB = getMappedReg(RegB, SrcRegMap);
+  if (!FromRegB)
+    return false;
   unsigned ToRegA = getMappedReg(RegA, DstRegMap);
-  return (FromRegA && ToRegA && !regsAreCompatible(FromRegA, ToRegA, TRI));
+  return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
 }
 
 /// ConvertInstTo3Addr - Convert the specified two-address instruction into a
@@ -664,6 +670,54 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
   return false;
 }
 
+/// ScanUses - Scan forward recursively for only uses, update maps if the use
+/// is a copy or a two-address instruction.
+void
+TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
+                                    SmallPtrSet<MachineInstr*, 8> &Processed) {
+  SmallVector<unsigned, 4> VirtRegPairs;
+  bool IsDstPhys;
+  bool IsCopy = false;
+  unsigned NewReg = 0;
+  unsigned Reg = DstReg;
+  while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
+                                                      NewReg, IsDstPhys)) {
+    if (IsCopy && !Processed.insert(UseMI))
+      break;
+
+    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+    if (DI != DistanceMap.end())
+      // Earlier in the same MBB.Reached via a back edge.
+      break;
+
+    if (IsDstPhys) {
+      VirtRegPairs.push_back(NewReg);
+      break;
+    }
+    bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second;
+    if (!isNew)
+      assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!");
+    VirtRegPairs.push_back(NewReg);
+    Reg = NewReg;
+  }
+
+  if (!VirtRegPairs.empty()) {
+    unsigned ToReg = VirtRegPairs.back();
+    VirtRegPairs.pop_back();
+    while (!VirtRegPairs.empty()) {
+      unsigned FromReg = VirtRegPairs.back();
+      VirtRegPairs.pop_back();
+      bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
+      if (!isNew)
+        assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!");
+      ToReg = FromReg;
+    }
+    bool isNew = DstRegMap.insert(std::make_pair(DstReg, ToReg)).second;
+    if (!isNew)
+      assert(DstRegMap[DstReg] == ToReg && "Can't map to two dst registers!");
+  }
+}
+
 /// ProcessCopy - If the specified instruction is not yet processed, process it
 /// if it's a copy. For a copy instruction, we find the physical registers the
 /// source and destination registers might be mapped to. These are kept in
@@ -695,49 +749,11 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
       assert(SrcRegMap[DstReg] == SrcReg &&
              "Can't map to two src physical registers!");
 
-    SmallVector<unsigned, 4> VirtRegPairs;
-    bool IsCopy = false;
-    unsigned NewReg = 0;
-    while (MachineInstr *UseMI = findOnlyInterestingUse(DstReg, MBB, MRI,TII,
-                                                   IsCopy, NewReg, IsDstPhys)) {
-      if (IsCopy) {
-        if (!Processed.insert(UseMI))
-          break;
-      }
-
-      DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
-      if (DI != DistanceMap.end())
-        // Earlier in the same MBB.Reached via a back edge.
-        break;
-
-      if (IsDstPhys) {
-        VirtRegPairs.push_back(NewReg);
-        break;
-      }
-      bool isNew = SrcRegMap.insert(std::make_pair(NewReg, DstReg)).second;
-      if (!isNew)
-        assert(SrcRegMap[NewReg] == DstReg &&
-               "Can't map to two src physical registers!");
-      VirtRegPairs.push_back(NewReg);
-      DstReg = NewReg;
-    }
-
-    if (!VirtRegPairs.empty()) {
-      unsigned ToReg = VirtRegPairs.back();
-      VirtRegPairs.pop_back();
-      while (!VirtRegPairs.empty()) {
-        unsigned FromReg = VirtRegPairs.back();
-        VirtRegPairs.pop_back();
-        bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
-        if (!isNew)
-          assert(DstRegMap[FromReg] == ToReg &&
-                 "Can't map to two dst physical registers!");
-        ToReg = FromReg;
-      }
-    }
+    ScanUses(DstReg, MBB, Processed);
   }
 
   Processed.insert(MI);
+  return;
 }
 
 /// isSafeToDelete - If the specified instruction does not produce any side
@@ -836,7 +852,8 @@ bool TwoAddressInstructionPass::
 TryInstructionTransform(MachineBasicBlock::iterator &mi,
                         MachineBasicBlock::iterator &nmi,
                         MachineFunction::iterator &mbbi,
-                        unsigned SrcIdx, unsigned DstIdx, unsigned Dist) {
+                        unsigned SrcIdx, unsigned DstIdx, unsigned Dist,
+                        SmallPtrSet<MachineInstr*, 8> &Processed) {
   const TargetInstrDesc &TID = mi->getDesc();
   unsigned regA = mi->getOperand(DstIdx).getReg();
   unsigned regB = mi->getOperand(SrcIdx).getReg();
@@ -887,10 +904,13 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
     return false;
   }
 
+  if (TargetRegisterInfo::isVirtualRegister(regA))
+    ScanUses(regA, &*mbbi, Processed);
+
   if (TID.isConvertibleTo3Addr()) {
     // This instruction is potentially convertible to a true
     // three-address instruction.  Check if it is profitable.
-    if (!regBKilled || isProfitableToConv3Addr(regA)) {
+    if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
       // Try to convert it.
       if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) {
         ++NumConvertedTo3Addr;
@@ -951,7 +971,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
         MachineBasicBlock::iterator NewMI = NewMIs[1];
         bool TransformSuccess =
           TryInstructionTransform(NewMI, mi, mbbi,
-                                  NewSrcIdx, NewDstIdx, Dist);
+                                  NewSrcIdx, NewDstIdx, Dist, Processed);
         if (TransformSuccess ||
             NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
           // Success, or at least we made an improvement. Keep the unfolded
@@ -1100,7 +1120,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
               mi->getOperand(DstIdx).getReg())
             break; // Done with this instruction.
 
-          if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist))
+          if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist,
+                                      Processed))
             break; // The tied operands have been eliminated.
         }
 
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 734b87e..226b78f 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -259,7 +259,9 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
   DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
                << "********** Function: "
                << MF->getFunction()->getName() << '\n');
-
+  DEBUG(dump());
+  SmallVector<unsigned, 8> SuperDeads;
+  SmallVector<unsigned, 8> SuperDefs;
   SmallVector<unsigned, 8> SuperKills;
 
   for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
@@ -283,12 +285,13 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
         if (MO.getSubReg()) {
           // A virtual register kill refers to the whole register, so we may
           // have to add <imp-use,kill> operands for the super-register.
-          if (MO.isUse() && MO.isKill() && !MO.isUndef())
-            SuperKills.push_back(PhysReg);
-
-          // We don't have to deal with sub-register defs because
-          // LiveIntervalAnalysis already added the necessary <imp-def>
-          // operands.
+          if (MO.isUse()) {
+            if (MO.isKill() && !MO.isUndef())
+              SuperKills.push_back(PhysReg);
+          } else if (MO.isDead())
+            SuperDeads.push_back(PhysReg);
+          else
+            SuperDefs.push_back(PhysReg);
 
           // PhysReg operands cannot have subregister indexes.
           PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg());
@@ -305,16 +308,28 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
       while (!SuperKills.empty())
         MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
 
+      while (!SuperDeads.empty())
+        MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true);
+
+      while (!SuperDefs.empty())
+        MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI);
+
       DEBUG(dbgs() << "> " << *MI);
 
       // Finally, remove any identity copies.
       if (MI->isIdentityCopy()) {
-        DEBUG(dbgs() << "Deleting identity copy.\n");
-        RemoveMachineInstrFromMaps(MI);
-        if (Indexes)
-          Indexes->removeMachineInstrFromMaps(MI);
-        // It's safe to erase MI because MII has already been incremented.
-        MI->eraseFromParent();
+        if (MI->getNumOperands() == 2) {
+          DEBUG(dbgs() << "Deleting identity copy.\n");
+          RemoveMachineInstrFromMaps(MI);
+          if (Indexes)
+            Indexes->removeMachineInstrFromMaps(MI);
+          // It's safe to erase MI because MII has already been incremented.
+          MI->eraseFromParent();
+        } else {
+          // Transform identity copy to a KILL to deal with subregisters.
+          MI->setDesc(TII->get(TargetOpcode::KILL));
+          DEBUG(dbgs() << "Identity copy: " << *MI);
+        }
       }
     }
   }
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index ec149dd..1850658 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -32,7 +32,7 @@ STATISTIC(NumCommutes, "Number of instructions commuted");
 STATISTIC(NumDRM     , "Number of re-materializable defs elided");
 STATISTIC(NumStores  , "Number of stores added");
 STATISTIC(NumPSpills , "Number of physical register spills");
-STATISTIC(NumOmitted , "Number of reloads omited");
+STATISTIC(NumOmitted , "Number of reloads omitted");
 STATISTIC(NumAvoided , "Number of reloads deemed unnecessary");
 STATISTIC(NumCopified, "Number of available reloads turned into copies");
 STATISTIC(NumReMats  , "Number of re-materialization");
@@ -261,6 +261,10 @@ public:
   /// now).
   void ModifyStackSlotOrReMat(int SlotOrReMat);
 
+  /// ClobberSharingStackSlots - When a register mapped to a stack slot changes,
+  /// other stack slots sharing the same register are no longer valid.
+  void ClobberSharingStackSlots(int StackSlot);
+
   /// AddAvailableRegsToLiveIn - Availability information is being kept coming
   /// into the specified MBB. Add available physical registers as potential
   /// live-in's. If they are reused in the MBB, they will be added to the
@@ -665,7 +669,7 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
   }
 }
 
-/// ReMaterialize - Re-materialize definition for Reg targetting DestReg.
+/// ReMaterialize - Re-materialize definition for Reg targeting DestReg.
 ///
 static void ReMaterialize(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator &MII,
@@ -831,6 +835,26 @@ void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
   PhysRegsAvailable.erase(I);
 }
 
+void AvailableSpills::ClobberSharingStackSlots(int StackSlot) {
+  std::map<int, unsigned>::iterator It =
+    SpillSlotsOrReMatsAvailable.find(StackSlot);
+  if (It == SpillSlotsOrReMatsAvailable.end()) return;
+  unsigned Reg = It->second >> 1;
+
+  // Erase entries in PhysRegsAvailable for other stack slots.
+  std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
+  while (I != PhysRegsAvailable.end() && I->first == Reg) {
+    std::multimap<unsigned, int>::iterator NextI = llvm::next(I);
+    if (I->second != StackSlot) {
+      DEBUG(dbgs() << "Clobbered sharing SS#" << I->second << " in "
+                   << PrintReg(Reg, TRI) << '\n');
+      SpillSlotsOrReMatsAvailable.erase(I->second);
+      PhysRegsAvailable.erase(I);
+    }
+    I = NextI;
+  }
+}
+
 // ************************** //
 // Reuse Info Implementation  //
 // ************************** //
@@ -1791,8 +1815,8 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
       else
         DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
       DEBUG(dbgs() << " from physreg "
-                   << TRI->getName(InReg) << " for vreg"
-                   << VirtReg <<" instead of reloading into physreg "
+                   << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
+                   <<" instead of reloading into physreg "
                    << TRI->getName(Phys) << '\n');
 
       // Reusing a physreg may resurrect it. But we expect ProcessUses to update
@@ -1807,8 +1831,8 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
       else
         DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
       DEBUG(dbgs() << " from physreg "
-                   << TRI->getName(InReg) << " for vreg"
-                   << VirtReg <<" by copying it into physreg "
+                   << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
+                   <<" by copying it into physreg "
                    << TRI->getName(Phys) << '\n');
 
       // If the reloaded / remat value is available in another register,
@@ -2025,7 +2049,8 @@ void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
               TRI->regsOverlap(MOk.getReg(), PhysReg)) {
             CanReuse = false;
             DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
-                         << " for vreg" << VirtReg << ": " << MOk << '\n');
+                         << " for " << PrintReg(VirtReg) << ": " << MOk
+                         << '\n');
             break;
           }
         }
@@ -2039,9 +2064,9 @@ void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
         else
           DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
         DEBUG(dbgs() << " from physreg "
-              << TRI->getName(PhysReg) << " for vreg"
-              << VirtReg <<" instead of reloading into physreg "
-              << TRI->getName(VRM->getPhys(VirtReg)) << '\n');
+              << TRI->getName(PhysReg) << " for " << PrintReg(VirtReg)
+              << " instead of reloading into "
+              << PrintReg(VRM->getPhys(VirtReg), TRI) << '\n');
         unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
         MI.getOperand(i).setReg(RReg);
         MI.getOperand(i).setSubReg(0);
@@ -2126,7 +2151,7 @@ void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
         else
           DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
         DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
-              << " for vreg" << VirtReg
+              << " for " << PrintReg(VirtReg)
               << " instead of reloading into same physreg.\n");
         unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
         MI.getOperand(i).setReg(RReg);
@@ -2315,7 +2340,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
     for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) {
       unsigned VirtReg = FoldedVirts[FVI].first;
       VirtRegMap::ModRef MR = FoldedVirts[FVI].second;
-      DEBUG(dbgs() << "Folded vreg: " << VirtReg << "  MR: " << MR);
+      DEBUG(dbgs() << "Folded " << PrintReg(VirtReg) << "  MR: " << MR);
 
       int SS = VRM->getStackSlot(VirtReg);
       if (SS == VirtRegMap::NO_STACK_SLOT)
@@ -2549,6 +2574,10 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
         }
       }
 
+      // If StackSlot is available in a register that also holds other stack
+      // slots, clobber those stack slots now.
+      Spills.ClobberSharingStackSlots(StackSlot);
+
       assert(PhysReg && "VR not assigned a physical register?");
       MRI->setPhysRegUsed(PhysReg);
       unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
diff --git a/lib/CompilerDriver/CMakeLists.txt b/lib/CompilerDriver/CMakeLists.txt
index 2248de0..a12b337 100644
--- a/lib/CompilerDriver/CMakeLists.txt
+++ b/lib/CompilerDriver/CMakeLists.txt
@@ -1,10 +1,12 @@
 set(LLVM_LINK_COMPONENTS support)
-set(LLVM_REQUIRES_EH 1)
 
-add_llvm_tool(llvmc
+# We don't want this library to appear in `llvm-config --libs` output,
+# so its name doesn't start with "LLVM".
+
+add_llvm_library(CompilerDriver
   Action.cpp
+  BuiltinOptions.cpp
   CompilationGraph.cpp
-  llvmc.cpp
-  Plugin.cpp
+  Main.cpp
   Tool.cpp
   )
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
index b5632d2..8bff265 100644
--- a/lib/ExecutionEngine/CMakeLists.txt
+++ b/lib/ExecutionEngine/CMakeLists.txt
@@ -6,3 +6,4 @@ add_llvm_library(LLVMExecutionEngine
 add_subdirectory(Interpreter)
 add_subdirectory(JIT)
 add_subdirectory(MCJIT)
+add_subdirectory(RuntimeDyld)
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index f286975..2b1e878 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -79,9 +79,10 @@ ExecutionEngine::~ExecutionEngine() {
 
 void ExecutionEngine::DeregisterAllTables() {
   if (ExceptionTableDeregister) {
-    for (std::vector<void*>::iterator it = AllExceptionTables.begin(),
-           ie = AllExceptionTables.end(); it != ie; ++it)
-      ExceptionTableDeregister(*it);
+    DenseMap<const Function*, void*>::iterator it = AllExceptionTables.begin();
+    DenseMap<const Function*, void*>::iterator ite = AllExceptionTables.end();
+    for (; it != ite; ++it)
+      ExceptionTableDeregister(it->second);
     AllExceptionTables.clear();
   }
 }
@@ -310,19 +311,19 @@ void ExecutionEngine::runStaticConstructorsDestructors(Module *module,
   // it.
   if (!GV || GV->isDeclaration() || GV->hasLocalLinkage()) return;
 
-  // Should be an array of '{ int, void ()* }' structs.  The first value is
+  // Should be an array of '{ i32, void ()* }' structs.  The first value is
   // the init priority, which we ignore.
-  ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
-  if (!InitList) return;
+  if (isa<ConstantAggregateZero>(GV->getInitializer()))
+    return;
+  ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
-    ConstantStruct *CS =
-      dyn_cast<ConstantStruct>(InitList->getOperand(i));
-    if (!CS) continue;
-    if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+    if (isa<ConstantAggregateZero>(InitList->getOperand(i)))
+      continue;
+    ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i));
 
     Constant *FP = CS->getOperand(1);
     if (FP->isNullValue())
-      break;  // Found a null terminator, exit.
+      continue;  // Found a sentinal value, ignore.
 
     // Strip off constant expression casts.
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
@@ -838,7 +839,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
   case Type::PointerTyID:
     // Ensure 64 bit target pointers are fully initialized on 32 bit hosts.
     if (StoreBytes != sizeof(PointerTy))
-      memset(Ptr, 0, StoreBytes);
+      memset(&(Ptr->PointerVal), 0, StoreBytes);
 
     *((PointerTy*)Ptr) = Val.PointerVal;
     break;
diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp
index 169e1ba..fa8bee4 100644
--- a/lib/ExecutionEngine/JIT/Intercept.cpp
+++ b/lib/ExecutionEngine/JIT/Intercept.cpp
@@ -52,8 +52,8 @@ static void runAtExitHandlers() {
 #include <sys/stat.h>
 #endif
 #include <fcntl.h>
-/* stat functions are redirecting to __xstat with a version number.  On x86-64 
- * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' 
+/* stat functions are redirecting to __xstat with a version number.  On x86-64
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
  * available as an exported symbol, so we have to add it explicitly.
  */
 namespace {
@@ -119,18 +119,18 @@ void *JIT::getPointerToNamedFunction(const std::string &Name,
     const char *NameStr = Name.c_str();
     // If this is an asm specifier, skip the sentinal.
     if (NameStr[0] == 1) ++NameStr;
-    
+
     // If it's an external function, look it up in the process image...
     void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
     if (Ptr) return Ptr;
-    
+
     // If it wasn't found and if it starts with an underscore ('_') character,
     // and has an asm specifier, try again without the underscore.
     if (Name[0] == 1 && NameStr[0] == '_') {
       Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
       if (Ptr) return Ptr;
     }
-    
+
     // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf.  These
     // are references to hidden visibility symbols that dlsym cannot resolve.
     // If we have one of these, strip off $LDBLStub and try again.
@@ -147,7 +147,7 @@ void *JIT::getPointerToNamedFunction(const std::string &Name,
     }
 #endif
   }
-  
+
   /// If a LazyFunctionCreator is installed, use it to get/create the function.
   if (LazyFunctionCreator)
     if (void *RP = LazyFunctionCreator(Name))
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index cc76b13..d1f87ac 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -35,7 +35,7 @@
 
 using namespace llvm;
 
-#ifdef __APPLE__ 
+#ifdef __APPLE__
 // Apple gcc defaults to -fuse-cxa-atexit (i.e. calls __cxa_atexit instead
 // of atexit). It passes the address of linker generated symbol __dso_handle
 // to the function.
@@ -75,7 +75,7 @@ extern "C" void LLVMLinkInJIT() {
 #endif
 
 #if HAVE_EHTABLE_SUPPORT
- 
+
 // libgcc defines the __register_frame function to dynamically register new
 // dwarf frames for exception handling. This functionality is not portable
 // across compilers and is only provided by GCC. We use the __register_frame
@@ -113,10 +113,10 @@ struct LibgccObject {
   void *unused1;
   void *unused2;
   void *unused3;
-  
+
   /// frame - Pointer to the exception table.
   void *frame;
-  
+
   /// encoding -  The encoding of the object?
   union {
     struct {
@@ -124,15 +124,15 @@ struct LibgccObject {
       unsigned long from_array : 1;
       unsigned long mixed_encoding : 1;
       unsigned long encoding : 8;
-      unsigned long count : 21; 
+      unsigned long count : 21;
     } b;
     size_t i;
   } encoding;
-  
+
   /// fde_end - libgcc defines this field only if some macro is defined. We
   /// include this field even if it may not there, to make libgcc happy.
   char *fde_end;
-  
+
   /// next - At least we know it's a chained list!
   struct LibgccObject *next;
 };
@@ -153,7 +153,7 @@ struct LibgccObjectInfo {
   /// unseenObjects - LibgccObjects not parsed yet by the unwinding runtime.
   ///
   struct LibgccObject* unseenObjects;
-  
+
   unsigned unused[2];
 };
 
@@ -165,32 +165,32 @@ void DarwinRegisterFrame(void* FrameBegin) {
   LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
     _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
   assert(LOI && "This should be preallocated by the runtime");
-  
+
   // Allocate a new LibgccObject to represent this frame. Deallocation of this
   // object may be impossible: since darwin code in libgcc was written after
   // the ability to dynamically register frames, things may crash if we
   // deallocate it.
   struct LibgccObject* ob = (struct LibgccObject*)
     malloc(sizeof(struct LibgccObject));
-  
+
   // Do like libgcc for the values of the field.
   ob->unused1 = (void *)-1;
   ob->unused2 = 0;
   ob->unused3 = 0;
   ob->frame = FrameBegin;
-  ob->encoding.i = 0; 
+  ob->encoding.i = 0;
   ob->encoding.b.encoding = llvm::dwarf::DW_EH_PE_omit;
-  
+
   // Put the info on both places, as libgcc uses the first or the second
   // field. Note that we rely on having two pointers here. If fde_end was a
   // char, things would get complicated.
   ob->fde_end = (char*)LOI->unseenObjects;
   ob->next = LOI->unseenObjects;
-  
+
   // Update the key's unseenObjects list.
   LOI->unseenObjects = ob;
-  
-  // Finally update the "key". Apparently, libgcc requires it. 
+
+  // Finally update the "key". Apparently, libgcc requires it.
   _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST,
                                          LOI);
 
@@ -312,18 +312,18 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
   if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) {
     report_fatal_error("Target does not support machine code emission!");
   }
-  
+
   // Register routine for informing unwinding runtime about new EH frames
 #if HAVE_EHTABLE_SUPPORT
 #if USE_KEYMGR
   struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
     _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
-  
+
   // The key is created on demand, and libgcc creates it the first time an
   // exception occurs. Since we need the key to register frames, we create
   // it now.
   if (!LOI)
-    LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1); 
+    LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1);
   _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, LOI);
   InstallExceptionTableRegister(DarwinRegisterFrame);
   // Not sure about how to deregister on Darwin.
@@ -332,7 +332,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
   InstallExceptionTableDeregister(__deregister_frame);
 #endif // __APPLE__
 #endif // HAVE_EHTABLE_SUPPORT
-  
+
   // Initialize passes.
   PM.doInitialization();
 }
@@ -365,11 +365,11 @@ void JIT::addModule(Module *M) {
     if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
       report_fatal_error("Target does not support machine code emission!");
     }
-    
+
     // Initialize passes.
     PM.doInitialization();
   }
-  
+
   ExecutionEngine::addModule(M);
 }
 
@@ -377,29 +377,29 @@ void JIT::addModule(Module *M) {
 /// since the PassManager it contains references a released Module.
 bool JIT::removeModule(Module *M) {
   bool result = ExecutionEngine::removeModule(M);
-  
+
   MutexGuard locked(lock);
-  
+
   if (jitstate->getModule() == M) {
     delete jitstate;
     jitstate = 0;
   }
-  
+
   if (!jitstate && !Modules.empty()) {
     jitstate = new JITState(Modules[0]);
 
     FunctionPassManager &PM = jitstate->getPM(locked);
     PM.add(new TargetData(*TM.getTargetData()));
-    
+
     // Turn the machine code intermediate representation into bytes in memory
     // that may be executed.
     if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
       report_fatal_error("Target does not support machine code emission!");
     }
-    
+
     // Initialize passes.
     PM.doInitialization();
-  }    
+  }
   return result;
 }
 
@@ -433,7 +433,7 @@ GenericValue JIT::runFunction(Function *F,
 
         // Call the function.
         GenericValue rv;
-        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), 
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
                                  (char **)GVTOP(ArgValues[1]),
                                  (const char **)GVTOP(ArgValues[2])));
         return rv;
@@ -446,7 +446,7 @@ GenericValue JIT::runFunction(Function *F,
 
         // Call the function.
         GenericValue rv;
-        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), 
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
                                  (char **)GVTOP(ArgValues[1])));
         return rv;
       }
@@ -480,7 +480,7 @@ GenericValue JIT::runFunction(Function *F,
         rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)());
       else if (BitWidth <= 64)
         rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)());
-      else 
+      else
         llvm_unreachable("Integer types > 64 bits not supported");
       return rv;
     }
@@ -542,7 +542,7 @@ GenericValue JIT::runFunction(Function *F,
     case Type::PointerTyID:
       void *ArgPtr = GVTOP(AV);
       if (sizeof(void*) == 4)
-        C = ConstantInt::get(Type::getInt32Ty(F->getContext()), 
+        C = ConstantInt::get(Type::getInt32Ty(F->getContext()),
                              (int)(intptr_t)ArgPtr);
       else
         C = ConstantInt::get(Type::getInt64Ty(F->getContext()),
@@ -649,7 +649,7 @@ void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
            "Externally-defined function should not be in pending list.");
 
     jitTheFunction(PF, locked);
-    
+
     // Now that the function has been jitted, ask the JITEmitter to rewrite
     // the stub with real address of the function.
     updateFunctionStub(PF);
@@ -666,7 +666,7 @@ void JIT::jitTheFunction(Function *F, const MutexGuard &locked) {
 }
 
 /// getPointerToFunction - This method is used to get the address of the
-/// specified function, compiling it if neccesary.
+/// specified function, compiling it if necessary.
 ///
 void *JIT::getPointerToFunction(Function *F) {
 
@@ -703,7 +703,7 @@ void *JIT::getPointerToFunction(Function *F) {
 
 void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) {
   MutexGuard locked(lock);
-  
+
   BasicBlockAddressMapTy::iterator I =
     getBasicBlockAddressMap(locked).find(BB);
   if (I == getBasicBlockAddressMap(locked).end()) {
@@ -724,7 +724,7 @@ void *JIT::getPointerToBasicBlock(BasicBlock *BB) {
 
   // resolve basic block address
   MutexGuard locked(lock);
-  
+
   BasicBlockAddressMapTy::iterator I =
     getBasicBlockAddressMap(locked).find(BB);
   if (I != getBasicBlockAddressMap(locked).end()) {
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index 1d1763e..b576c16 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -42,7 +42,7 @@ public:
   FunctionPassManager &getPM(const MutexGuard &L) {
     return PM;
   }
-  
+
   Module *getModule() const { return M; }
   std::vector<AssertingVH<Function> > &getPendingFunctions(const MutexGuard &L){
     return PendingFunctions;
@@ -86,7 +86,7 @@ public:
   static void Register() {
     JITCtor = createJIT;
   }
-  
+
   /// getJITInfo - Return the target JIT information structure.
   ///
   TargetJITInfo &getJITInfo() const { return TJI; }
@@ -106,7 +106,7 @@ public:
   }
 
   virtual void addModule(Module *M);
-  
+
   /// removeModule - Remove a Module from the list of modules.  Returns true if
   /// M is found.
   virtual bool removeModule(Module *M);
@@ -146,7 +146,7 @@ public:
   /// getPointerToBasicBlock - This returns the address of the specified basic
   /// block, assuming function is compiled.
   void *getPointerToBasicBlock(BasicBlock *BB);
-  
+
   /// getOrEmitGlobalVariable - Return the address of the specified global
   /// variable, possibly emitting it to memory if needed.  This is used by the
   /// Emitter.
@@ -172,7 +172,7 @@ public:
   void freeMachineCodeForFunction(Function *F);
 
   /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd
-  /// function was encountered.  Add it to a pending list to be processed after 
+  /// function was encountered.  Add it to a pending list to be processed after
   /// the current function.
   ///
   void addPendingFunction(Function *F);
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
index 3b5acb7..e71c20b 100644
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
+++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -27,7 +27,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Mutex.h"
 #include <string>
-#include <vector>
 
 namespace llvm {
 
@@ -143,7 +142,7 @@ void JITDebugRegisterer::RegisterFunction(const Function *F, DebugInfo &I) {
 
   // Add a mapping from F to the entry and buffer, so we can delete this
   // info later.
-  FnMap[F] = std::make_pair<std::string, jit_code_entry*>(Buffer, JITCodeEntry);
+  FnMap[F] = std::make_pair(Buffer, JITCodeEntry);
 
   // Acquire the lock and do the registration.
   {
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index f54ccca..ddb0d54 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -34,7 +34,7 @@ using namespace llvm;
 JITDwarfEmitter::JITDwarfEmitter(JIT& theJit) : MMI(0), Jit(theJit) {}
 
 
-unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, 
+unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
                                                JITCodeEmitter& jce,
                                                unsigned char* StartFunction,
                                                unsigned char* EndFunction,
@@ -47,10 +47,10 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
   RI = TM.getRegisterInfo();
   TFI = TM.getFrameLowering();
   JCE = &jce;
-  
+
   unsigned char* ExceptionTable = EmitExceptionTable(&F, StartFunction,
                                                      EndFunction);
-      
+
   unsigned char* Result = 0;
 
   const std::vector<const Function *> Personalities = MMI->getPersonalities();
@@ -63,7 +63,7 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
 }
 
 
-void 
+void
 JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
                                 const std::vector<MachineMove> &Moves) const {
   unsigned PointerSize = TD->getPointerSize();
@@ -74,26 +74,26 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
   for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
     const MachineMove &Move = Moves[i];
     MCSymbol *Label = Move.getLabel();
-    
+
     // Throw out move if the label is invalid.
     if (Label && (*JCE->getLabelLocations())[Label] == 0)
       continue;
-    
+
     intptr_t LabelPtr = 0;
     if (Label) LabelPtr = JCE->getLabelAddress(Label);
 
     const MachineLocation &Dst = Move.getDestination();
     const MachineLocation &Src = Move.getSource();
-    
+
     // Advance row if new location.
     if (BaseLabelPtr && Label && BaseLabel != Label) {
       JCE->emitByte(dwarf::DW_CFA_advance_loc4);
       JCE->emitInt32(LabelPtr - BaseLabelPtr);
-      
-      BaseLabel = Label; 
+
+      BaseLabel = Label;
       BaseLabelPtr = LabelPtr;
     }
-    
+
     // If advancing cfa.
     if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
       if (!Src.isReg()) {
@@ -103,7 +103,7 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
           JCE->emitByte(dwarf::DW_CFA_def_cfa);
           JCE->emitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), true));
         }
-        
+
         JCE->emitULEB128Bytes(-Src.getOffset());
       } else {
         llvm_unreachable("Machine move not supported yet.");
@@ -119,7 +119,7 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
     } else {
       unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true);
       int Offset = Dst.getOffset() / stackGrowth;
-      
+
       if (Offset < 0) {
         JCE->emitByte(dwarf::DW_CFA_offset_extended_sf);
         JCE->emitULEB128Bytes(Reg);
@@ -382,7 +382,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
 
   unsigned TypeOffset = sizeof(int8_t) + // Call site format
                         // Call-site table length
-                        MCAsmInfo::getULEB128Size(SizeSites) + 
+                        MCAsmInfo::getULEB128Size(SizeSites) +
                         SizeSites + SizeActions + SizeTypes;
 
   // Begin the exception table.
@@ -452,7 +452,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
   // Emit the type ids.
   for (unsigned M = TypeInfos.size(); M; --M) {
     const GlobalVariable *GV = TypeInfos[M - 1];
-    
+
     if (GV) {
       if (TD->getPointerSize() == sizeof(int32_t))
         JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV));
@@ -484,7 +484,7 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
   unsigned PointerSize = TD->getPointerSize();
   int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
           PointerSize : -PointerSize;
-  
+
   unsigned char* StartCommonPtr = (unsigned char*)JCE->getCurrentPCValue();
   // EH Common Frame header
   JCE->allocateSpace(4, 0);
@@ -499,13 +499,13 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
   if (Personality) {
     // Augmentation Size: 3 small ULEBs of one byte each, and the personality
     // function which size is PointerSize.
-    JCE->emitULEB128Bytes(3 + PointerSize); 
-    
+    JCE->emitULEB128Bytes(3 + PointerSize);
+
     // We set the encoding of the personality as direct encoding because we use
     // the function pointer. The encoding is not relative because the current
     // PC value may be bigger than the personality function pointer.
     if (PointerSize == 4) {
-      JCE->emitByte(dwarf::DW_EH_PE_sdata4); 
+      JCE->emitByte(dwarf::DW_EH_PE_sdata4);
       JCE->emitInt32(((intptr_t)Jit.getPointerToGlobal(Personality)));
     } else {
       JCE->emitByte(dwarf::DW_EH_PE_sdata8);
@@ -540,11 +540,11 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
 unsigned char*
 JITDwarfEmitter::EmitEHFrame(const Function* Personality,
                              unsigned char* StartCommonPtr,
-                             unsigned char* StartFunction, 
+                             unsigned char* StartFunction,
                              unsigned char* EndFunction,
                              unsigned char* ExceptionTable) const {
   unsigned PointerSize = TD->getPointerSize();
-  
+
   // EH frame header.
   unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue();
   JCE->allocateSpace(4, 0);
@@ -558,7 +558,7 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality,
   // specific data area in the exception table.
   if (Personality) {
     JCE->emitULEB128Bytes(PointerSize == 4 ? 4 : 8);
-        
+
     if (PointerSize == 4) {
       if (!MMI->getLandingPads().empty())
         JCE->emitInt32(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue());
@@ -573,7 +573,7 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality,
   } else {
     JCE->emitULEB128Bytes(0);
   }
-      
+
   // Indicate locations of function specific  callee saved registers in
   // frame.
   EmitFrameMoves((intptr_t)StartFunction, MMI->getFrameMoves());
@@ -593,6 +593,6 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality,
     JCE->emitInt32(0);
     JCE->emitInt32(0);
   }
-  
+
   return StartEHPtr;
 }
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
index 9495697..e1d0045 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
@@ -35,33 +35,33 @@ class JITDwarfEmitter {
   MachineModuleInfo* MMI;
   JIT& Jit;
   bool stackGrowthDirection;
-  
+
   unsigned char* EmitExceptionTable(MachineFunction* MF,
-                                    unsigned char* StartFunction, 
+                                    unsigned char* StartFunction,
                                     unsigned char* EndFunction) const;
 
-  void EmitFrameMoves(intptr_t BaseLabelPtr, 
+  void EmitFrameMoves(intptr_t BaseLabelPtr,
                       const std::vector<MachineMove> &Moves) const;
-    
+
   unsigned char* EmitCommonEHFrame(const Function* Personality) const;
 
-  unsigned char* EmitEHFrame(const Function* Personality, 
+  unsigned char* EmitEHFrame(const Function* Personality,
                              unsigned char* StartBufferPtr,
-                             unsigned char* StartFunction, 
+                             unsigned char* StartFunction,
                              unsigned char* EndFunction,
                              unsigned char* ExceptionTable) const;
-    
+
 public:
-  
+
   JITDwarfEmitter(JIT& jit);
-  
-  unsigned char* EmitDwarfTable(MachineFunction& F, 
+
+  unsigned char* EmitDwarfTable(MachineFunction& F,
                                 JITCodeEmitter& JCE,
                                 unsigned char* StartFunction,
                                 unsigned char* EndFunction,
                                 unsigned char* &EHFramePtr);
-  
-  
+
+
   void setModuleInfo(MachineModuleInfo* Info) {
     MMI = Info;
   }
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 4cd8757..d046b8a 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -123,17 +123,18 @@ namespace {
       return FunctionToLazyStubMap;
     }
 
-    GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& locked) {
-      assert(locked.holds(TheJIT->lock));
+    GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& lck) {
+      assert(lck.holds(TheJIT->lock));
       return GlobalToIndirectSymMap;
     }
 
-    pair<void *, Function *> LookupFunctionFromCallSite(
+    std::pair<void *, Function *> LookupFunctionFromCallSite(
         const MutexGuard &locked, void *CallSite) const {
       assert(locked.holds(TheJIT->lock));
 
-      // The address given to us for the stub may not be exactly right, it might be
-      // a little bit after the stub.  As such, use upper_bound to find it.
+      // The address given to us for the stub may not be exactly right, it
+      // might be a little bit after the stub.  As such, use upper_bound to
+      // find it.
       CallSiteToFunctionMapTy::const_iterator I =
         CallSiteToFunctionMap.upper_bound(CallSite);
       assert(I != CallSiteToFunctionMap.begin() &&
@@ -645,7 +646,7 @@ void *JITResolver::JITCompilerFn(void *Stub) {
 
     // The address given to us for the stub may not be exactly right, it might
     // be a little bit after the stub.  As such, use upper_bound to find it.
-    pair<void*, Function*> I =
+    std::pair<void*, Function*> I =
       JR->state.LookupFunctionFromCallSite(locked, Stub);
     F = I.second;
     ActualPtr = I.first;
@@ -659,7 +660,8 @@ void *JITResolver::JITCompilerFn(void *Stub) {
 
     // If lazy compilation is disabled, emit a useful error message and abort.
     if (!JR->TheJIT->isCompilingLazily()) {
-      report_fatal_error("LLVM JIT requested to do lazy compilation of function '"
+      report_fatal_error("LLVM JIT requested to do lazy compilation of"
+                         " function '"
                         + F->getName() + "' when lazy compiles are disabled!");
     }
 
@@ -745,7 +747,7 @@ void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
 void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
   if (DL.isUnknown()) return;
   if (!BeforePrintingInsn) return;
-  
+
   const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext();
 
   if (DL.getScope(Context) != 0 && PrevDL != DL) {
@@ -781,7 +783,7 @@ void JITEmitter::startFunction(MachineFunction &F) {
   uintptr_t ActualSize = 0;
   // Set the memory writable, if it's not already
   MemMgr->setMemoryWritable();
-  
+
   if (SizeEstimate > 0) {
     // SizeEstimate will be non-zero on reallocation attempts.
     ActualSize = SizeEstimate;
@@ -859,7 +861,8 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
         } else if (MR.isBasicBlock()) {
           ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock());
         } else if (MR.isConstantPoolIndex()) {
-          ResultPtr = (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+          ResultPtr =
+            (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex());
         } else {
           assert(MR.isJumpTableIndex());
           ResultPtr=(void*)getJumpTableEntryAddress(MR.getJumpTableIndex());
@@ -985,7 +988,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
     CurBufferPtr = SavedCurBufferPtr;
 
     if (JITExceptionHandling) {
-      TheJIT->RegisterTable(FrameRegister);
+      TheJIT->RegisterTable(F.getFunction(), FrameRegister);
     }
 
     if (JITEmitDebugInfo) {
@@ -1033,8 +1036,9 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
     EmittedFunctions.erase(Emitted);
   }
 
-  // TODO: Do we need to unregister exception handling information from libgcc
-  // here?
+  if(JITExceptionHandling) {
+    TheJIT->DeregisterTable(F);
+  }
 
   if (JITEmitDebugInfo) {
     DR->UnregisterFunction(F);
@@ -1129,7 +1133,7 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty() || JumpTableBase == 0) return;
 
-  
+
   switch (MJTI->getEntryKind()) {
   case MachineJumpTableInfo::EK_Inline:
     return;
@@ -1138,11 +1142,11 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
     //     .word LBB123
     assert(MJTI->getEntrySize(*TheJIT->getTargetData()) == sizeof(void*) &&
            "Cross JIT'ing?");
-    
+
     // For each jump table, map each target in the jump table to the address of
     // an emitted MachineBasicBlock.
     intptr_t *SlotPtr = (intptr_t*)JumpTableBase;
-    
+
     for (unsigned i = 0, e = JT.size(); i != e; ++i) {
       const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
       // Store the address of the basic block for this jump table slot in the
@@ -1152,7 +1156,7 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
     }
     break;
   }
-      
+
   case MachineJumpTableInfo::EK_Custom32:
   case MachineJumpTableInfo::EK_GPRel32BlockAddress:
   case MachineJumpTableInfo::EK_LabelDifference32: {
diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
index 670fa7d..9a9ed6d 100644
--- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
@@ -108,8 +108,8 @@ void OProfileJITEventListener::NotifyFunctionEmitted(
   if (op_write_native_code(Agent, F.getName().data(),
                            reinterpret_cast<uint64_t>(FnStart),
                            FnStart, FnSize) == -1) {
-    DEBUG(dbgs() << "Failed to tell OProfile about native function " 
-          << F.getName() << " at [" 
+    DEBUG(dbgs() << "Failed to tell OProfile about native function "
+          << F.getName() << " at ["
           << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
     return;
   }
@@ -153,9 +153,9 @@ void OProfileJITEventListener::NotifyFunctionEmitted(
 
     if (op_write_debug_line_info(Agent, FnStart,
                                  LineInfo.size(), &*LineInfo.begin()) == -1) {
-      DEBUG(dbgs() 
+      DEBUG(dbgs()
             << "Failed to tell OProfile about line numbers for native function "
-            << F.getName() << " at [" 
+            << F.getName() << " at ["
             << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
     }
   }
diff --git a/lib/ExecutionEngine/JIT/TargetSelect.cpp b/lib/ExecutionEngine/JIT/TargetSelect.cpp
index 6b7173c..8d92ab0 100644
--- a/lib/ExecutionEngine/JIT/TargetSelect.cpp
+++ b/lib/ExecutionEngine/JIT/TargetSelect.cpp
@@ -84,7 +84,7 @@ TargetMachine *JIT::selectTarget(Module *Mod,
   }
 
   // Allocate a target...
-  TargetMachine *Target = 
+  TargetMachine *Target =
     TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr);
   assert(Target && "Could not allocate target machine!");
   return Target;
diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
index f7ed176..6553079 100644
--- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_llvm_library(LLVMMCJIT
   MCJIT.cpp
   TargetSelect.cpp
+  Intercept.cpp
   )
diff --git a/lib/ExecutionEngine/MCJIT/Intercept.cpp b/lib/ExecutionEngine/MCJIT/Intercept.cpp
new file mode 100644
index 0000000..e431c84
--- /dev/null
+++ b/lib/ExecutionEngine/MCJIT/Intercept.cpp
@@ -0,0 +1,161 @@
+//===-- Intercept.cpp - System function interception routines -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// If a function call occurs to an external function, the JIT is designed to use
+// the dynamic loader interface to find a function to call.  This is useful for
+// calling system calls and library functions that are not available in LLVM.
+// Some system calls, however, need to be handled specially.  For this reason,
+// we intercept some of them here and use our own stubs to handle them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCJIT.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+// AtExitHandlers - List of functions to call when the program exits,
+// registered with the atexit() library function.
+static std::vector<void (*)()> AtExitHandlers;
+
+/// runAtExitHandlers - Run any functions registered by the program's
+/// calls to atexit(3), which we intercept and store in
+/// AtExitHandlers.
+///
+static void runAtExitHandlers() {
+  while (!AtExitHandlers.empty()) {
+    void (*Fn)() = AtExitHandlers.back();
+    AtExitHandlers.pop_back();
+    Fn();
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Function stubs that are invoked instead of certain library calls
+//===----------------------------------------------------------------------===//
+
+// Force the following functions to be linked in to anything that uses the
+// JIT. This is a hack designed to work around the all-too-clever Glibc
+// strategy of making these functions work differently when inlined vs. when
+// not inlined, and hiding their real definitions in a separate archive file
+// that the dynamic linker can't see. For more info, search for
+// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+#if defined(__linux__)
+#if defined(HAVE_SYS_STAT_H)
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+/* stat functions are redirecting to __xstat with a version number.  On x86-64
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
+ * available as an exported symbol, so we have to add it explicitly.
+ */
+namespace {
+class StatSymbols {
+public:
+  StatSymbols() {
+    sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
+    sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
+    sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
+    sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
+    sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
+    sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
+    sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
+    sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
+    sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
+  }
+};
+}
+static StatSymbols initStatSymbols;
+#endif // __linux__
+
+// jit_exit - Used to intercept the "exit" library call.
+static void jit_exit(int Status) {
+  runAtExitHandlers();   // Run atexit handlers...
+  exit(Status);
+}
+
+// jit_atexit - Used to intercept the "atexit" library call.
+static int jit_atexit(void (*Fn)()) {
+  AtExitHandlers.push_back(Fn);    // Take note of atexit handler...
+  return 0;  // Always successful
+}
+
+static int jit_noop() {
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+/// getPointerToNamedFunction - This method returns the address of the specified
+/// function by using the dynamic loader interface.  As such it is only useful
+/// for resolving library symbols, not code generated symbols.
+///
+void *MCJIT::getPointerToNamedFunction(const std::string &Name,
+                                       bool AbortOnFailure) {
+  if (!isSymbolSearchingDisabled()) {
+    // Check to see if this is one of the functions we want to intercept.  Note,
+    // we cast to intptr_t here to silence a -pedantic warning that complains
+    // about casting a function pointer to a normal pointer.
+    if (Name == "exit") return (void*)(intptr_t)&jit_exit;
+    if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+
+    // We should not invoke parent's ctors/dtors from generated main()!
+    // On Mingw and Cygwin, the symbol __main is resolved to
+    // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+    // (and register wrong callee's dtors with atexit(3)).
+    // We expect ExecutionEngine::runStaticConstructorsDestructors()
+    // is called before ExecutionEngine::runFunctionAsMain() is called.
+    if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+    const char *NameStr = Name.c_str();
+    // If this is an asm specifier, skip the sentinal.
+    if (NameStr[0] == 1) ++NameStr;
+
+    // If it's an external function, look it up in the process image...
+    void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+    if (Ptr) return Ptr;
+
+    // If it wasn't found and if it starts with an underscore ('_') character,
+    // and has an asm specifier, try again without the underscore.
+    if (Name[0] == 1 && NameStr[0] == '_') {
+      Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+      if (Ptr) return Ptr;
+    }
+
+    // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf.  These
+    // are references to hidden visibility symbols that dlsym cannot resolve.
+    // If we have one of these, strip off $LDBLStub and try again.
+#if defined(__APPLE__) && defined(__ppc__)
+    if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
+        memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
+      // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
+      // This mirrors logic in libSystemStubs.a.
+      std::string Prefix = std::string(Name.begin(), Name.end()-9);
+      if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
+        return Ptr;
+      if (void *Ptr = getPointerToNamedFunction(Prefix, false))
+        return Ptr;
+    }
+#endif
+  }
+
+  /// If a LazyFunctionCreator is installed, use it to get/create the function.
+  if (LazyFunctionCreator)
+    if (void *RP = LazyFunctionCreator(Name))
+      return RP;
+
+  if (AbortOnFailure) {
+    report_fatal_error("Program used external function '"+Name+
+                      "' which could not be resolved!");
+  }
+  return 0;
+}
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index f1e9dab..3d4ee36 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -1,4 +1,4 @@
-//===-- JIT.cpp - MC-based Just-in-Time Compiler --------------------------===//
+//===-- MCJIT.cpp - MC-based Just-in-Time Compiler ------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,10 +8,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCJIT.h"
+#include "MCJITMemoryManager.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/MCJIT.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Target/TargetData.h"
 
 using namespace llvm;
 
@@ -51,20 +58,47 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
 
   // If the target supports JIT code generation, create the JIT.
   if (TargetJITInfo *TJ = TM->getJITInfo())
-    return new MCJIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode);
+    return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM), OptLevel,
+                     GVsWithCode);
 
   if (ErrorStr)
     *ErrorStr = "target does not support JIT code generation";
   return 0;
 }
 
-MCJIT::MCJIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
-             JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
+MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
+             RTDyldMemoryManager *MM, CodeGenOpt::Level OptLevel,
              bool AllocateGVsWithCode)
-  : ExecutionEngine(M) {
+  : ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) {
+
+  PM.add(new TargetData(*TM->getTargetData()));
+
+  // Turn the machine code intermediate representation into bytes in memory
+  // that may be executed.
+  if (TM->addPassesToEmitMC(PM, Ctx, OS, CodeGenOpt::Default, false)) {
+    report_fatal_error("Target does not support MC emission!");
+  }
+
+  // Initialize passes.
+  // FIXME: When we support multiple modules, we'll want to move the code
+  // gen and finalization out of the constructor here and do it more
+  // on-demand as part of getPointerToFunction().
+  PM.run(*M);
+  // Flush the output buffer so the SmallVector gets its data.
+  OS.flush();
+
+  // Load the object into the dynamic linker.
+  // FIXME: It would be nice to avoid making yet another copy.
+  MemoryBuffer *MB = MemoryBuffer::getMemBufferCopy(StringRef(Buffer.data(),
+                                                              Buffer.size()));
+  if (Dyld.loadObject(MB))
+    report_fatal_error(Dyld.getErrorString());
+  // Resolve any relocations.
+  Dyld.resolveRelocations();
 }
 
 MCJIT::~MCJIT() {
+  delete MemMgr;
 }
 
 void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
@@ -73,8 +107,15 @@ void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
 }
 
 void *MCJIT::getPointerToFunction(Function *F) {
-  report_fatal_error("not yet implemented");
-  return 0;
+  if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
+    bool AbortOnFailure = !F->hasExternalWeakLinkage();
+    void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
+    addGlobalMapping(F, Addr);
+    return Addr;
+  }
+
+  Twine Name = TM->getMCAsmInfo()->getGlobalPrefix() + F->getName();
+  return (void*)Dyld.getSymbolAddress(Name.str());
 }
 
 void *MCJIT::recompileAndRelinkFunction(Function *F) {
@@ -87,6 +128,102 @@ void MCJIT::freeMachineCodeForFunction(Function *F) {
 
 GenericValue MCJIT::runFunction(Function *F,
                                 const std::vector<GenericValue> &ArgValues) {
-  report_fatal_error("not yet implemented");
+  assert(F && "Function *F was null at entry to run()");
+
+  void *FPtr = getPointerToFunction(F);
+  assert(FPtr && "Pointer to fn's code was null after getPointerToFunction");
+  const FunctionType *FTy = F->getFunctionType();
+  const Type *RetTy = FTy->getReturnType();
+
+  assert((FTy->getNumParams() == ArgValues.size() ||
+          (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) &&
+         "Wrong number of arguments passed into function!");
+  assert(FTy->getNumParams() == ArgValues.size() &&
+         "This doesn't support passing arguments through varargs (yet)!");
+
+  // Handle some common cases first.  These cases correspond to common `main'
+  // prototypes.
+  if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) {
+    switch (ArgValues.size()) {
+    case 3:
+      if (FTy->getParamType(0)->isIntegerTy(32) &&
+          FTy->getParamType(1)->isPointerTy() &&
+          FTy->getParamType(2)->isPointerTy()) {
+        int (*PF)(int, char **, const char **) =
+          (int(*)(int, char **, const char **))(intptr_t)FPtr;
+
+        // Call the function.
+        GenericValue rv;
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+                                 (char **)GVTOP(ArgValues[1]),
+                                 (const char **)GVTOP(ArgValues[2])));
+        return rv;
+      }
+      break;
+    case 2:
+      if (FTy->getParamType(0)->isIntegerTy(32) &&
+          FTy->getParamType(1)->isPointerTy()) {
+        int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
+
+        // Call the function.
+        GenericValue rv;
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+                                 (char **)GVTOP(ArgValues[1])));
+        return rv;
+      }
+      break;
+    case 1:
+      if (FTy->getNumParams() == 1 &&
+          FTy->getParamType(0)->isIntegerTy(32)) {
+        GenericValue rv;
+        int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
+        return rv;
+      }
+      break;
+    }
+  }
+
+  // Handle cases where no arguments are passed first.
+  if (ArgValues.empty()) {
+    GenericValue rv;
+    switch (RetTy->getTypeID()) {
+    default: llvm_unreachable("Unknown return type for function call!");
+    case Type::IntegerTyID: {
+      unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth();
+      if (BitWidth == 1)
+        rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 8)
+        rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 16)
+        rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 32)
+        rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 64)
+        rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)());
+      else
+        llvm_unreachable("Integer types > 64 bits not supported");
+      return rv;
+    }
+    case Type::VoidTyID:
+      rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)());
+      return rv;
+    case Type::FloatTyID:
+      rv.FloatVal = ((float(*)())(intptr_t)FPtr)();
+      return rv;
+    case Type::DoubleTyID:
+      rv.DoubleVal = ((double(*)())(intptr_t)FPtr)();
+      return rv;
+    case Type::X86_FP80TyID:
+    case Type::FP128TyID:
+    case Type::PPC_FP128TyID:
+      llvm_unreachable("long double not supported yet");
+      return rv;
+    case Type::PointerTyID:
+      return PTOGV(((void*(*)())(intptr_t)FPtr)());
+    }
+  }
+
+  assert("Full-featured argument passing not supported yet!");
   return GenericValue();
 }
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index cd1f989..1b50766 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -10,14 +10,37 @@
 #ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H
 #define LLVM_LIB_EXECUTIONENGINE_MCJIT_H
 
+#include "llvm/PassManager.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 
+// FIXME: This makes all kinds of horrible assumptions for the time being,
+// like only having one module, not needing to worry about multi-threading,
+// blah blah. Purely in get-it-up-and-limping mode for now.
+
 class MCJIT : public ExecutionEngine {
-  MCJIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
-        JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
+  MCJIT(Module *M, TargetMachine *tm, TargetJITInfo &tji,
+        RTDyldMemoryManager *MemMgr, CodeGenOpt::Level OptLevel,
         bool AllocateGVsWithCode);
+
+  TargetMachine *TM;
+  MCContext *Ctx;
+  RTDyldMemoryManager *MemMgr;
+
+  // FIXME: These may need moved to a separate 'jitstate' member like the
+  // non-MC JIT does for multithreading and such. Just keep them here for now.
+  PassManager PM;
+  Module *M;
+  // FIXME: This really doesn't belong here.
+  SmallVector<char, 4096> Buffer; // Working buffer into which we JIT.
+  raw_svector_ostream OS;
+
+  RuntimeDyld Dyld;
+
 public:
   ~MCJIT();
 
@@ -35,6 +58,16 @@ public:
   virtual GenericValue runFunction(Function *F,
                                    const std::vector<GenericValue> &ArgValues);
 
+  /// getPointerToNamedFunction - This method returns the address of the
+  /// specified function by using the dlsym function call.  As such it is only
+  /// useful for resolving library symbols, not code generated symbols.
+  ///
+  /// If AbortOnFailure is false and no function with the given name is
+  /// found, this function silently returns a null pointer. Otherwise,
+  /// it prints a message to stderr and aborts.
+  ///
+  void *getPointerToNamedFunction(const std::string &Name,
+                                  bool AbortOnFailure = true);
   /// @}
   /// @name (Private) Registration Interfaces
   /// @{
diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
new file mode 100644
index 0000000..e3c6fda
--- /dev/null
+++ b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
@@ -0,0 +1,59 @@
+//===-- MCJITMemoryManager.h - Definition for the Memory Manager ---C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_MCJITMEMORYMANAGER_H
+#define LLVM_LIB_EXECUTIONENGINE_MCJITMEMORYMANAGER_H
+
+#include "llvm/Module.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include <assert.h>
+
+namespace llvm {
+
+// The MCJIT memory manager is a layer between the standard JITMemoryManager
+// and the RuntimeDyld interface that maps objects, by name, onto their
+// matching LLVM IR counterparts in the module(s) being compiled.
+class MCJITMemoryManager : public RTDyldMemoryManager {
+  JITMemoryManager *JMM;
+
+  // FIXME: Multiple modules.
+  Module *M;
+public:
+  MCJITMemoryManager(JITMemoryManager *jmm) : JMM(jmm) {}
+
+  // Allocate ActualSize bytes, or more, for the named function. Return
+  // a pointer to the allocated memory and update Size to reflect how much
+  // memory was acutally allocated.
+  uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) {
+    // FIXME: This should really reference the MCAsmInfo to get the global
+    //        prefix.
+    if (Name[0] == '_') ++Name;
+    Function *F = M->getFunction(Name);
+    assert(F && "No matching function in JIT IR Module!");
+    return JMM->startFunctionBody(F, Size);
+  }
+
+  // Mark the end of the function, including how much of the allocated
+  // memory was actually used.
+  void endFunctionBody(const char *Name, uint8_t *FunctionStart,
+                       uint8_t *FunctionEnd) {
+    // FIXME: This should really reference the MCAsmInfo to get the global
+    //        prefix.
+    if (Name[0] == '_') ++Name;
+    Function *F = M->getFunction(Name);
+    assert(F && "No matching function in JIT IR Module!");
+    JMM->endFunctionBody(F, FunctionStart, FunctionEnd);
+  }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile
index 1858d77..9a649a5 100644
--- a/lib/ExecutionEngine/Makefile
+++ b/lib/ExecutionEngine/Makefile
@@ -8,6 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../..
 LIBRARYNAME = LLVMExecutionEngine
-PARALLEL_DIRS = Interpreter JIT MCJIT
+PARALLEL_DIRS = Interpreter JIT MCJIT RuntimeDyld
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
new file mode 100644
index 0000000..9e53f87
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMRuntimeDyld
+  RuntimeDyld.cpp
+  )
diff --git a/lib/ExecutionEngine/RuntimeDyld/Makefile b/lib/ExecutionEngine/RuntimeDyld/Makefile
new file mode 100644
index 0000000..5d6f26d
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/Makefile
@@ -0,0 +1,13 @@
+##===- lib/ExecutionEngine/MCJIT/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMRuntimeDyld
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
new file mode 100644
index 0000000..065e5e3
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -0,0 +1,669 @@
+//===-- RuntimeDyld.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dyld"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/Object/MachOObject.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace llvm::object;
+
+// Empty out-of-line virtual destructor as the key function.
+RTDyldMemoryManager::~RTDyldMemoryManager() {}
+
+namespace llvm {
+class RuntimeDyldImpl {
+  unsigned CPUType;
+  unsigned CPUSubtype;
+
+  // The MemoryManager to load objects into.
+  RTDyldMemoryManager *MemMgr;
+
+  // FIXME: This all assumes we're dealing with external symbols for anything
+  //        explicitly referenced. I.e., we can index by name and things
+  //        will work out. In practice, this may not be the case, so we
+  //        should find a way to effectively generalize.
+
+  // For each function, we have a MemoryBlock of it's instruction data.
+  StringMap<sys::MemoryBlock> Functions;
+
+  // Master symbol table. As modules are loaded and external symbols are
+  // resolved, their addresses are stored here.
+  StringMap<uint8_t*> SymbolTable;
+
+  // For each symbol, keep a list of relocations based on it. Anytime
+  // its address is reassigned (the JIT re-compiled the function, e.g.),
+  // the relocations get re-resolved.
+  struct RelocationEntry {
+    std::string Target;     // Object this relocation is contained in.
+    uint64_t    Offset;     // Offset into the object for the relocation.
+    uint32_t    Data;       // Second word of the raw macho relocation entry.
+    int64_t     Addend;     // Addend encoded in the instruction itself, if any.
+    bool        isResolved; // Has this relocation been resolved previously?
+
+    RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend)
+      : Target(t), Offset(offset), Data(data), Addend(addend),
+        isResolved(false) {}
+  };
+  typedef SmallVector<RelocationEntry, 4> RelocationList;
+  StringMap<RelocationList> Relocations;
+
+  // FIXME: Also keep a map of all the relocations contained in an object. Use
+  // this to dynamically answer whether all of the relocations in it have
+  // been resolved or not.
+
+  bool HasError;
+  std::string ErrorStr;
+
+  // Set the error state and record an error string.
+  bool Error(const Twine &Msg) {
+    ErrorStr = Msg.str();
+    HasError = true;
+    return true;
+  }
+
+  void extractFunction(StringRef Name, uint8_t *StartAddress,
+                       uint8_t *EndAddress);
+  bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
+                         unsigned Type, unsigned Size);
+  bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
+                               unsigned Type, unsigned Size);
+  bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
+                            unsigned Type, unsigned Size);
+
+  bool loadSegment32(const MachOObject *Obj,
+                     const MachOObject::LoadCommandInfo *SegmentLCI,
+                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
+  bool loadSegment64(const MachOObject *Obj,
+                     const MachOObject::LoadCommandInfo *SegmentLCI,
+                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
+
+public:
+  RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
+
+  bool loadObject(MemoryBuffer *InputBuffer);
+
+  void *getSymbolAddress(StringRef Name) {
+    // FIXME: Just look up as a function for now. Overly simple of course.
+    // Work in progress.
+    return SymbolTable.lookup(Name);
+  }
+
+  void resolveRelocations();
+
+  void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
+
+  // Is the linker in an error state?
+  bool hasError() { return HasError; }
+
+  // Mark the error condition as handled and continue.
+  void clearError() { HasError = false; }
+
+  // Get the error message.
+  StringRef getErrorString() { return ErrorStr; }
+};
+
+void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress,
+                                      uint8_t *EndAddress) {
+  // Allocate memory for the function via the memory manager.
+  uintptr_t Size = EndAddress - StartAddress + 1;
+  uint8_t *Mem = MemMgr->startFunctionBody(Name.data(), Size);
+  assert(Size >= (uint64_t)(EndAddress - StartAddress + 1) &&
+         "Memory manager failed to allocate enough memory!");
+  // Copy the function payload into the memory block.
+  memcpy(Mem, StartAddress, EndAddress - StartAddress + 1);
+  MemMgr->endFunctionBody(Name.data(), Mem, Mem + Size);
+  // Remember where we put it.
+  Functions[Name] = sys::MemoryBlock(Mem, Size);
+  // Default the assigned address for this symbol to wherever this
+  // allocated it.
+  SymbolTable[Name] = Mem;
+  DEBUG(dbgs() << "    allocated to " << Mem << "\n");
+}
+
+bool RuntimeDyldImpl::
+resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
+                  unsigned Type, unsigned Size) {
+  // This just dispatches to the proper target specific routine.
+  switch (CPUType) {
+  default: assert(0 && "Unsupported CPU type!");
+  case mach::CTM_x86_64:
+    return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value,
+                                   isPCRel, Type, Size);
+  case mach::CTM_ARM:
+    return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value,
+                                isPCRel, Type, Size);
+  }
+  llvm_unreachable("");
+}
+
+bool RuntimeDyldImpl::
+resolveX86_64Relocation(uintptr_t Address, uintptr_t Value,
+                        bool isPCRel, unsigned Type,
+                        unsigned Size) {
+  // If the relocation is PC-relative, the value to be encoded is the
+  // pointer difference.
+  if (isPCRel)
+    // FIXME: It seems this value needs to be adjusted by 4 for an effective PC
+    // address. Is that expected? Only for branches, perhaps?
+    Value -= Address + 4;
+
+  switch(Type) {
+  default:
+    llvm_unreachable("Invalid relocation type!");
+  case macho::RIT_X86_64_Unsigned:
+  case macho::RIT_X86_64_Branch: {
+    // Mask in the target value a byte at a time (we don't have an alignment
+    // guarantee for the target address, so this is safest).
+    uint8_t *p = (uint8_t*)Address;
+    for (unsigned i = 0; i < Size; ++i) {
+      *p++ = (uint8_t)Value;
+      Value >>= 8;
+    }
+    return false;
+  }
+  case macho::RIT_X86_64_Signed:
+  case macho::RIT_X86_64_GOTLoad:
+  case macho::RIT_X86_64_GOT:
+  case macho::RIT_X86_64_Subtractor:
+  case macho::RIT_X86_64_Signed1:
+  case macho::RIT_X86_64_Signed2:
+  case macho::RIT_X86_64_Signed4:
+  case macho::RIT_X86_64_TLV:
+    return Error("Relocation type not implemented yet!");
+  }
+  return false;
+}
+
+bool RuntimeDyldImpl::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
+                                           bool isPCRel, unsigned Type,
+                                           unsigned Size) {
+  // If the relocation is PC-relative, the value to be encoded is the
+  // pointer difference.
+  if (isPCRel) {
+    Value -= Address;
+    // ARM PCRel relocations have an effective-PC offset of two instructions
+    // (four bytes in Thumb mode, 8 bytes in ARM mode).
+    // FIXME: For now, assume ARM mode.
+    Value -= 8;
+  }
+
+  switch(Type) {
+  default:
+    llvm_unreachable("Invalid relocation type!");
+  case macho::RIT_Vanilla: {
+    llvm_unreachable("Invalid relocation type!");
+    // Mask in the target value a byte at a time (we don't have an alignment
+    // guarantee for the target address, so this is safest).
+    uint8_t *p = (uint8_t*)Address;
+    for (unsigned i = 0; i < Size; ++i) {
+      *p++ = (uint8_t)Value;
+      Value >>= 8;
+    }
+    break;
+  }
+  case macho::RIT_ARM_Branch24Bit: {
+    // Mask the value into the target address. We know instructions are
+    // 32-bit aligned, so we can do it all at once.
+    uint32_t *p = (uint32_t*)Address;
+    // The low two bits of the value are not encoded.
+    Value >>= 2;
+    // Mask the value to 24 bits.
+    Value &= 0xffffff;
+    // FIXME: If the destination is a Thumb function (and the instruction
+    // is a non-predicated BL instruction), we need to change it to a BLX
+    // instruction instead.
+
+    // Insert the value into the instruction.
+    *p = (*p & ~0xffffff) | Value;
+    break;
+  }
+  case macho::RIT_ARM_ThumbBranch22Bit:
+  case macho::RIT_ARM_ThumbBranch32Bit:
+  case macho::RIT_ARM_Half:
+  case macho::RIT_ARM_HalfDifference:
+  case macho::RIT_Pair:
+  case macho::RIT_Difference:
+  case macho::RIT_ARM_LocalDifference:
+  case macho::RIT_ARM_PreboundLazyPointer:
+    return Error("Relocation type not implemented yet!");
+  }
+  return false;
+}
+
+bool RuntimeDyldImpl::
+loadSegment32(const MachOObject *Obj,
+              const MachOObject::LoadCommandInfo *SegmentLCI,
+              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
+  InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
+  Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC);
+  if (!SegmentLC)
+    return Error("unable to load segment load command");
+
+  for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
+    InMemoryStruct<macho::Section> Sect;
+    Obj->ReadSection(*SegmentLCI, SectNum, Sect);
+    if (!Sect)
+      return Error("unable to load section: '" + Twine(SectNum) + "'");
+
+    // FIXME: Improve check.
+    if (Sect->Flags != 0x80000400)
+      return Error("unsupported section type!");
+
+    // Address and names of symbols in the section.
+    typedef std::pair<uint64_t, StringRef> SymbolEntry;
+    SmallVector<SymbolEntry, 64> Symbols;
+    // Index of all the names, in this section or not. Used when we're
+    // dealing with relocation entries.
+    SmallVector<StringRef, 64> SymbolNames;
+    for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
+      InMemoryStruct<macho::SymbolTableEntry> STE;
+      Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
+      if (!STE)
+        return Error("unable to read symbol: '" + Twine(i) + "'");
+      if (STE->SectionIndex > SegmentLC->NumSections)
+        return Error("invalid section index for symbol: '" + Twine(i) + "'");
+      // Get the symbol name.
+      StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
+      SymbolNames.push_back(Name);
+
+      // Just skip symbols not defined in this section.
+      if ((unsigned)STE->SectionIndex - 1 != SectNum)
+        continue;
+
+      // FIXME: Check the symbol type and flags.
+      if (STE->Type != 0xF)  // external, defined in this section.
+        return Error("unexpected symbol type!");
+      // Flags == 0x8 marks a thumb function for ARM, which is fine as it
+      // doesn't require any special handling here.
+      if (STE->Flags != 0x0 && STE->Flags != 0x8)
+        return Error("unexpected symbol type!");
+
+      // Remember the symbol.
+      Symbols.push_back(SymbolEntry(STE->Value, Name));
+
+      DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
+            (Sect->Address + STE->Value) << "\n");
+    }
+    // Sort the symbols by address, just in case they didn't come in that way.
+    array_pod_sort(Symbols.begin(), Symbols.end());
+
+    // Extract the function data.
+    uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset,
+                                           SegmentLC->FileSize).data();
+    for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
+      uint64_t StartOffset = Sect->Address + Symbols[i].first;
+      uint64_t EndOffset = Symbols[i + 1].first - 1;
+      DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
+                   << " from [" << StartOffset << ", " << EndOffset << "]\n");
+      extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
+    }
+    // The last symbol we do after since the end address is calculated
+    // differently because there is no next symbol to reference.
+    uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
+    uint64_t EndOffset = Sect->Size - 1;
+    DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
+                 << " from [" << StartOffset << ", " << EndOffset << "]\n");
+    extractFunction(Symbols[Symbols.size()-1].second,
+                    Base + StartOffset, Base + EndOffset);
+
+    // Now extract the relocation information for each function and process it.
+    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
+      InMemoryStruct<macho::RelocationEntry> RE;
+      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
+      if (RE->Word0 & macho::RF_Scattered)
+        return Error("NOT YET IMPLEMENTED: scattered relocations.");
+      // Word0 of the relocation is the offset into the section where the
+      // relocation should be applied. We need to translate that into an
+      // offset into a function since that's our atom.
+      uint32_t Offset = RE->Word0;
+      // Look for the function containing the address. This is used for JIT
+      // code, so the number of functions in section is almost always going
+      // to be very small (usually just one), so until we have use cases
+      // where that's not true, just use a trivial linear search.
+      unsigned SymbolNum;
+      unsigned NumSymbols = Symbols.size();
+      assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
+             "No symbol containing relocation!");
+      for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
+        if (Symbols[SymbolNum + 1].first > Offset)
+          break;
+      // Adjust the offset to be relative to the symbol.
+      Offset -= Symbols[SymbolNum].first;
+      // Get the name of the symbol containing the relocation.
+      StringRef TargetName = SymbolNames[SymbolNum];
+
+      bool isExtern = (RE->Word1 >> 27) & 1;
+      // Figure out the source symbol of the relocation. If isExtern is true,
+      // this relocation references the symbol table, otherwise it references
+      // a section in the same object, numbered from 1 through NumSections
+      // (SectionBases is [0, NumSections-1]).
+      // FIXME: Some targets (ARM) use internal relocations even for
+      // externally visible symbols, if the definition is in the same
+      // file as the reference. We need to convert those back to by-name
+      // references. We can resolve the address based on the section
+      // offset and see if we have a symbol at that address. If we do,
+      // use that; otherwise, puke.
+      if (!isExtern)
+        return Error("Internal relocations not supported.");
+      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
+      StringRef SourceName = SymbolNames[SourceNum];
+
+      // FIXME: Get the relocation addend from the target address.
+
+      // Now store the relocation information. Associate it with the source
+      // symbol.
+      Relocations[SourceName].push_back(RelocationEntry(TargetName,
+                                                        Offset,
+                                                        RE->Word1,
+                                                        0 /*Addend*/));
+      DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
+                   << " from '" << SourceName << "(Word1: "
+                   << format("0x%x", RE->Word1) << ")\n");
+    }
+  }
+  return false;
+}
+
+
+bool RuntimeDyldImpl::
+loadSegment64(const MachOObject *Obj,
+              const MachOObject::LoadCommandInfo *SegmentLCI,
+              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
+  InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
+  Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC);
+  if (!Segment64LC)
+    return Error("unable to load segment load command");
+
+  for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
+    InMemoryStruct<macho::Section64> Sect;
+    Obj->ReadSection64(*SegmentLCI, SectNum, Sect);
+    if (!Sect)
+      return Error("unable to load section: '" + Twine(SectNum) + "'");
+
+    // FIXME: Improve check.
+    if (Sect->Flags != 0x80000400)
+      return Error("unsupported section type!");
+
+    // Address and names of symbols in the section.
+    typedef std::pair<uint64_t, StringRef> SymbolEntry;
+    SmallVector<SymbolEntry, 64> Symbols;
+    // Index of all the names, in this section or not. Used when we're
+    // dealing with relocation entries.
+    SmallVector<StringRef, 64> SymbolNames;
+    for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
+      InMemoryStruct<macho::Symbol64TableEntry> STE;
+      Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
+      if (!STE)
+        return Error("unable to read symbol: '" + Twine(i) + "'");
+      if (STE->SectionIndex > Segment64LC->NumSections)
+        return Error("invalid section index for symbol: '" + Twine(i) + "'");
+      // Get the symbol name.
+      StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
+      SymbolNames.push_back(Name);
+
+      // Just skip symbols not defined in this section.
+      if ((unsigned)STE->SectionIndex - 1 != SectNum)
+        continue;
+
+      // FIXME: Check the symbol type and flags.
+      if (STE->Type != 0xF)  // external, defined in this section.
+        return Error("unexpected symbol type!");
+      if (STE->Flags != 0x0)
+        return Error("unexpected symbol type!");
+
+      // Remember the symbol.
+      Symbols.push_back(SymbolEntry(STE->Value, Name));
+
+      DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
+            (Sect->Address + STE->Value) << "\n");
+    }
+    // Sort the symbols by address, just in case they didn't come in that way.
+    array_pod_sort(Symbols.begin(), Symbols.end());
+
+    // Extract the function data.
+    uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset,
+                                           Segment64LC->FileSize).data();
+    for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
+      uint64_t StartOffset = Sect->Address + Symbols[i].first;
+      uint64_t EndOffset = Symbols[i + 1].first - 1;
+      DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
+                   << " from [" << StartOffset << ", " << EndOffset << "]\n");
+      extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
+    }
+    // The last symbol we do after since the end address is calculated
+    // differently because there is no next symbol to reference.
+    uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
+    uint64_t EndOffset = Sect->Size - 1;
+    DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
+                 << " from [" << StartOffset << ", " << EndOffset << "]\n");
+    extractFunction(Symbols[Symbols.size()-1].second,
+                    Base + StartOffset, Base + EndOffset);
+
+    // Now extract the relocation information for each function and process it.
+    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
+      InMemoryStruct<macho::RelocationEntry> RE;
+      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
+      if (RE->Word0 & macho::RF_Scattered)
+        return Error("NOT YET IMPLEMENTED: scattered relocations.");
+      // Word0 of the relocation is the offset into the section where the
+      // relocation should be applied. We need to translate that into an
+      // offset into a function since that's our atom.
+      uint32_t Offset = RE->Word0;
+      // Look for the function containing the address. This is used for JIT
+      // code, so the number of functions in section is almost always going
+      // to be very small (usually just one), so until we have use cases
+      // where that's not true, just use a trivial linear search.
+      unsigned SymbolNum;
+      unsigned NumSymbols = Symbols.size();
+      assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
+             "No symbol containing relocation!");
+      for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
+        if (Symbols[SymbolNum + 1].first > Offset)
+          break;
+      // Adjust the offset to be relative to the symbol.
+      Offset -= Symbols[SymbolNum].first;
+      // Get the name of the symbol containing the relocation.
+      StringRef TargetName = SymbolNames[SymbolNum];
+
+      bool isExtern = (RE->Word1 >> 27) & 1;
+      // Figure out the source symbol of the relocation. If isExtern is true,
+      // this relocation references the symbol table, otherwise it references
+      // a section in the same object, numbered from 1 through NumSections
+      // (SectionBases is [0, NumSections-1]).
+      if (!isExtern)
+        return Error("Internal relocations not supported.");
+      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
+      StringRef SourceName = SymbolNames[SourceNum];
+
+      // FIXME: Get the relocation addend from the target address.
+
+      // Now store the relocation information. Associate it with the source
+      // symbol.
+      Relocations[SourceName].push_back(RelocationEntry(TargetName,
+                                                        Offset,
+                                                        RE->Word1,
+                                                        0 /*Addend*/));
+      DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
+                   << " from '" << SourceName << "(Word1: "
+                   << format("0x%x", RE->Word1) << ")\n");
+    }
+  }
+  return false;
+}
+
+bool RuntimeDyldImpl::loadObject(MemoryBuffer *InputBuffer) {
+  // If the linker is in an error state, don't do anything.
+  if (hasError())
+    return true;
+  // Load the Mach-O wrapper object.
+  std::string ErrorStr;
+  OwningPtr<MachOObject> Obj(
+    MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr));
+  if (!Obj)
+    return Error("unable to load object: '" + ErrorStr + "'");
+
+  // Get the CPU type information from the header.
+  const macho::Header &Header = Obj->getHeader();
+
+  // FIXME: Error checking that the loaded object is compatible with
+  //        the system we're running on.
+  CPUType = Header.CPUType;
+  CPUSubtype = Header.CPUSubtype;
+
+  // Validate that the load commands match what we expect.
+  const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0,
+    *DysymtabLCI = 0;
+  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
+    const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i);
+    switch (LCI.Command.Type) {
+    case macho::LCT_Segment:
+    case macho::LCT_Segment64:
+      if (SegmentLCI)
+        return Error("unexpected input object (multiple segments)");
+      SegmentLCI = &LCI;
+      break;
+    case macho::LCT_Symtab:
+      if (SymtabLCI)
+        return Error("unexpected input object (multiple symbol tables)");
+      SymtabLCI = &LCI;
+      break;
+    case macho::LCT_Dysymtab:
+      if (DysymtabLCI)
+        return Error("unexpected input object (multiple symbol tables)");
+      DysymtabLCI = &LCI;
+      break;
+    default:
+      return Error("unexpected input object (unexpected load command");
+    }
+  }
+
+  if (!SymtabLCI)
+    return Error("no symbol table found in object");
+  if (!SegmentLCI)
+    return Error("no symbol table found in object");
+
+  // Read and register the symbol table data.
+  InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
+  Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
+  if (!SymtabLC)
+    return Error("unable to load symbol table load command");
+  Obj->RegisterStringTable(*SymtabLC);
+
+  // Read the dynamic link-edit information, if present (not present in static
+  // objects).
+  if (DysymtabLCI) {
+    InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC;
+    Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC);
+    if (!DysymtabLC)
+      return Error("unable to load dynamic link-exit load command");
+
+    // FIXME: We don't support anything interesting yet.
+//    if (DysymtabLC->LocalSymbolsIndex != 0)
+//      return Error("NOT YET IMPLEMENTED: local symbol entries");
+//    if (DysymtabLC->ExternalSymbolsIndex != 0)
+//      return Error("NOT YET IMPLEMENTED: non-external symbol entries");
+//    if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries)
+//      return Error("NOT YET IMPLEMENTED: undefined symbol entries");
+  }
+
+  // Load the segment load command.
+  if (SegmentLCI->Command.Type == macho::LCT_Segment) {
+    if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC))
+      return true;
+  } else {
+    if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC))
+      return true;
+  }
+
+  return false;
+}
+
+// Resolve the relocations for all symbols we currently know about.
+void RuntimeDyldImpl::resolveRelocations() {
+  // Just iterate over the symbols in our symbol table and assign their
+  // addresses.
+  StringMap<uint8_t*>::iterator i = SymbolTable.begin();
+  StringMap<uint8_t*>::iterator e = SymbolTable.end();
+  for (;i != e; ++i)
+    reassignSymbolAddress(i->getKey(), i->getValue());
+}
+
+// Assign an address to a symbol name and resolve all the relocations
+// associated with it.
+void RuntimeDyldImpl::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
+  // Assign the address in our symbol table.
+  SymbolTable[Name] = Addr;
+
+  RelocationList &Relocs = Relocations[Name];
+  for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+    RelocationEntry &RE = Relocs[i];
+    uint8_t *Target = SymbolTable[RE.Target] + RE.Offset;
+    bool isPCRel = (RE.Data >> 24) & 1;
+    unsigned Type = (RE.Data >> 28) & 0xf;
+    unsigned Size = 1 << ((RE.Data >> 25) & 3);
+
+    DEBUG(dbgs() << "Resolving relocation at '" << RE.Target
+          << "' + " << RE.Offset << " (" << format("%p", Target) << ")"
+          << " from '" << Name << " (" << format("%p", Addr) << ")"
+          << "(" << (isPCRel ? "pcrel" : "absolute")
+          << ", type: " << Type << ", Size: " << Size << ").\n");
+
+    resolveRelocation(Target, Addr, isPCRel, Type, Size);
+    RE.isResolved = true;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// RuntimeDyld class implementation
+RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *MM) {
+  Dyld = new RuntimeDyldImpl(MM);
+}
+
+RuntimeDyld::~RuntimeDyld() {
+  delete Dyld;
+}
+
+bool RuntimeDyld::loadObject(MemoryBuffer *InputBuffer) {
+  return Dyld->loadObject(InputBuffer);
+}
+
+void *RuntimeDyld::getSymbolAddress(StringRef Name) {
+  return Dyld->getSymbolAddress(Name);
+}
+
+void RuntimeDyld::resolveRelocations() {
+  Dyld->resolveRelocations();
+}
+
+void RuntimeDyld::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
+  Dyld->reassignSymbolAddress(Name, Addr);
+}
+
+StringRef RuntimeDyld::getErrorString() {
+  return Dyld->getErrorString();
+}
+
+} // end namespace llvm
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 5aa06ab..f372db2 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -505,6 +505,7 @@ static bool LinkGlobals(Module *Dest, const Module *Src,
                            SGV->getType()->getAddressSpace());
       // Propagate alignment, visibility and section info.
       CopyGVAttributes(NewDGV, SGV);
+      NewDGV->setUnnamedAddr(SGV->hasUnnamedAddr());
 
       // If the LLVM runtime renamed the global, but it is an externally visible
       // symbol, DGV must be an existing global with internal linkage.  Rename
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index f1811a1..6aed059 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_library(LLVMMC
   MCCodeEmitter.cpp
   MCContext.cpp
   MCDisassembler.cpp
+  MCELF.cpp
   MCELFObjectTargetWriter.cpp
   MCELFStreamer.cpp
   MCExpr.cpp
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index ea1629d..23c6d4c 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -11,20 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "ELFObjectWriter.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELFSymbolFlags.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -38,39 +32,14 @@
 #include <vector>
 using namespace llvm;
 
-static unsigned GetType(const MCSymbolData &SD) {
-  uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift;
-  assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
-         Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
-         Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
-         Type == ELF::STT_TLS);
-  return Type;
-}
-
-static unsigned GetBinding(const MCSymbolData &SD) {
-  uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift;
-  assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
-         Binding == ELF::STB_WEAK);
-  return Binding;
-}
-
-static void SetBinding(MCSymbolData &SD, unsigned Binding) {
-  assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
-         Binding == ELF::STB_WEAK);
-  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift);
-  SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift));
-}
+bool ELFObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
+  const MCFixupKindInfo &FKI =
+    Asm.getBackend().getFixupKindInfo((MCFixupKind) Kind);
 
-static unsigned GetVisibility(MCSymbolData &SD) {
-  unsigned Visibility =
-    (SD.getFlags() & (0xf << ELF_STV_Shift)) >> ELF_STV_Shift;
-  assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
-         Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
-  return Visibility;
+  return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
 }
 
-
-static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) {
+bool ELFObjectWriter::RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) {
   switch (Variant) {
   default:
     return false;
@@ -90,345 +59,6 @@ static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) {
   }
 }
 
-static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
-  const MCFixupKindInfo &FKI =
-    Asm.getBackend().getFixupKindInfo((MCFixupKind) Kind);
-
-  return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
-}
-
-namespace {
-  class ELFObjectWriter : public MCObjectWriter {
-  protected:
-    /*static bool isFixupKindX86RIPRel(unsigned Kind) {
-      return Kind == X86::reloc_riprel_4byte ||
-        Kind == X86::reloc_riprel_4byte_movq_load;
-    }*/
-
-
-    /// ELFSymbolData - Helper struct for containing some precomputed information
-    /// on symbols.
-    struct ELFSymbolData {
-      MCSymbolData *SymbolData;
-      uint64_t StringIndex;
-      uint32_t SectionIndex;
-
-      // Support lexicographic sorting.
-      bool operator<(const ELFSymbolData &RHS) const {
-        if (GetType(*SymbolData) == ELF::STT_FILE)
-          return true;
-        if (GetType(*RHS.SymbolData) == ELF::STT_FILE)
-          return false;
-        return SymbolData->getSymbol().getName() <
-               RHS.SymbolData->getSymbol().getName();
-      }
-    };
-
-    /// @name Relocation Data
-    /// @{
-
-    struct ELFRelocationEntry {
-      // Make these big enough for both 32-bit and 64-bit
-      uint64_t r_offset;
-      int Index;
-      unsigned Type;
-      const MCSymbol *Symbol;
-      uint64_t r_addend;
-
-      ELFRelocationEntry()
-        : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0) {}
-
-      ELFRelocationEntry(uint64_t RelocOffset, int Idx,
-                         unsigned RelType, const MCSymbol *Sym,
-                         uint64_t Addend)
-        : r_offset(RelocOffset), Index(Idx), Type(RelType),
-          Symbol(Sym), r_addend(Addend) {}
-
-      // Support lexicographic sorting.
-      bool operator<(const ELFRelocationEntry &RE) const {
-        return RE.r_offset < r_offset;
-      }
-    };
-
-    /// The target specific ELF writer instance.
-    llvm::OwningPtr<MCELFObjectTargetWriter> TargetObjectWriter;
-
-    SmallPtrSet<const MCSymbol *, 16> UsedInReloc;
-    SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc;
-    DenseMap<const MCSymbol *, const MCSymbol *> Renames;
-
-    llvm::DenseMap<const MCSectionData*,
-                   std::vector<ELFRelocationEntry> > Relocations;
-    DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
-
-    /// @}
-    /// @name Symbol Table Data
-    /// @{
-
-    SmallString<256> StringTable;
-    std::vector<ELFSymbolData> LocalSymbolData;
-    std::vector<ELFSymbolData> ExternalSymbolData;
-    std::vector<ELFSymbolData> UndefinedSymbolData;
-
-    /// @}
-
-    bool NeedsGOT;
-
-    bool NeedsSymtabShndx;
-
-    // This holds the symbol table index of the last local symbol.
-    unsigned LastLocalSymbolIndex;
-    // This holds the .strtab section index.
-    unsigned StringTableIndex;
-    // This holds the .symtab section index.
-    unsigned SymbolTableIndex;
-
-    unsigned ShstrtabIndex;
-
-
-    const MCSymbol *SymbolToReloc(const MCAssembler &Asm,
-                                  const MCValue &Target,
-                                  const MCFragment &F) const;
-
-    // For arch-specific emission of explicit reloc symbol
-    virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
-                                           const MCValue &Target,
-                                           const MCFragment &F,
-                                           bool IsBSS) const {
-      return NULL;
-    }
-
-    bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
-    bool hasRelocationAddend() const {
-      return TargetObjectWriter->hasRelocationAddend();
-    }
-
-  public:
-    ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                    raw_ostream &_OS, bool IsLittleEndian)
-      : MCObjectWriter(_OS, IsLittleEndian),
-        TargetObjectWriter(MOTW),
-        NeedsGOT(false), NeedsSymtabShndx(false){
-    }
-
-    virtual ~ELFObjectWriter();
-
-    void WriteWord(uint64_t W) {
-      if (is64Bit())
-        Write64(W);
-      else
-        Write32(W);
-    }
-
-    void StringLE16(char *buf, uint16_t Value) {
-      buf[0] = char(Value >> 0);
-      buf[1] = char(Value >> 8);
-    }
-
-    void StringLE32(char *buf, uint32_t Value) {
-      StringLE16(buf, uint16_t(Value >> 0));
-      StringLE16(buf + 2, uint16_t(Value >> 16));
-    }
-
-    void StringLE64(char *buf, uint64_t Value) {
-      StringLE32(buf, uint32_t(Value >> 0));
-      StringLE32(buf + 4, uint32_t(Value >> 32));
-    }
-
-    void StringBE16(char *buf ,uint16_t Value) {
-      buf[0] = char(Value >> 8);
-      buf[1] = char(Value >> 0);
-    }
-
-    void StringBE32(char *buf, uint32_t Value) {
-      StringBE16(buf, uint16_t(Value >> 16));
-      StringBE16(buf + 2, uint16_t(Value >> 0));
-    }
-
-    void StringBE64(char *buf, uint64_t Value) {
-      StringBE32(buf, uint32_t(Value >> 32));
-      StringBE32(buf + 4, uint32_t(Value >> 0));
-    }
-
-    void String8(MCDataFragment &F, uint8_t Value) {
-      char buf[1];
-      buf[0] = Value;
-      F.getContents() += StringRef(buf, 1);
-    }
-
-    void String16(MCDataFragment &F, uint16_t Value) {
-      char buf[2];
-      if (isLittleEndian())
-        StringLE16(buf, Value);
-      else
-        StringBE16(buf, Value);
-      F.getContents() += StringRef(buf, 2);
-    }
-
-    void String32(MCDataFragment &F, uint32_t Value) {
-      char buf[4];
-      if (isLittleEndian())
-        StringLE32(buf, Value);
-      else
-        StringBE32(buf, Value);
-      F.getContents() += StringRef(buf, 4);
-    }
-
-    void String64(MCDataFragment &F, uint64_t Value) {
-      char buf[8];
-      if (isLittleEndian())
-        StringLE64(buf, Value);
-      else
-        StringBE64(buf, Value);
-      F.getContents() += StringRef(buf, 8);
-    }
-
-    virtual void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections);
-
-    /// Default e_flags = 0
-    virtual void WriteEFlags() { Write32(0); }
-
-    virtual void WriteSymbolEntry(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
-                          uint64_t name, uint8_t info,
-                          uint64_t value, uint64_t size,
-                          uint8_t other, uint32_t shndx,
-                          bool Reserved);
-
-    virtual void WriteSymbol(MCDataFragment *SymtabF,  MCDataFragment *ShndxF,
-                     ELFSymbolData &MSD,
-                     const MCAsmLayout &Layout);
-
-    typedef DenseMap<const MCSectionELF*, uint32_t> SectionIndexMapTy;
-    virtual void WriteSymbolTable(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
-                          const MCAssembler &Asm,
-                          const MCAsmLayout &Layout,
-                          const SectionIndexMapTy &SectionIndexMap);
-
-    virtual void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                                  const MCFragment *Fragment, const MCFixup &Fixup,
-                                  MCValue Target, uint64_t &FixedValue);
-
-    virtual uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
-                                         const MCSymbol *S);
-
-    // Map from a group section to the signature symbol
-    typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy;
-    // Map from a signature symbol to the group section
-    typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy;
-
-    /// ComputeSymbolTable - Compute the symbol table data
-    ///
-    /// \param StringTable [out] - The string table data.
-    /// \param StringIndexMap [out] - Map from symbol names to offsets in the
-    /// string table.
-    virtual void ComputeSymbolTable(MCAssembler &Asm,
-                            const SectionIndexMapTy &SectionIndexMap,
-                            RevGroupMapTy RevGroupMap);
-
-    virtual void ComputeIndexMap(MCAssembler &Asm,
-                         SectionIndexMapTy &SectionIndexMap);
-
-    virtual void WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
-                         const MCSectionData &SD);
-
-    virtual void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout) {
-      for (MCAssembler::const_iterator it = Asm.begin(),
-             ie = Asm.end(); it != ie; ++it) {
-        WriteRelocation(Asm, Layout, *it);
-      }
-    }
-
-    virtual void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout,
-                                const SectionIndexMapTy &SectionIndexMap);
-
-    // Create the sections that show up in the symbol table. Currently
-    // those are the .note.GNU-stack section and the group sections.
-    virtual void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout,
-                                       GroupMapTy &GroupMap,
-                                       RevGroupMapTy &RevGroupMap);
-
-    virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
-                                          const MCAsmLayout &Layout);
-
-    virtual void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
-                          uint64_t Address, uint64_t Offset,
-                          uint64_t Size, uint32_t Link, uint32_t Info,
-                          uint64_t Alignment, uint64_t EntrySize);
-
-    virtual void WriteRelocationsFragment(const MCAssembler &Asm,
-                                          MCDataFragment *F,
-                                          const MCSectionData *SD);
-
-    virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
-    virtual void WriteSection(MCAssembler &Asm,
-                      const SectionIndexMapTy &SectionIndexMap,
-                      uint32_t GroupSymbolIndex,
-                      uint64_t Offset, uint64_t Size, uint64_t Alignment,
-                      const MCSectionELF &Section);
-
-  protected:
-    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
-                                  bool IsPCRel, bool IsRelocWithSymbol,
-                                  int64_t Addend) = 0;
-  };
-
-  //===- X86ELFObjectWriter -------------------------------------------===//
-
-  class X86ELFObjectWriter : public ELFObjectWriter {
-  public:
-    X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                       raw_ostream &_OS,
-                       bool IsLittleEndian);
-
-    virtual ~X86ELFObjectWriter();
-  protected:
-    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
-                                  bool IsPCRel, bool IsRelocWithSymbol,
-                                  int64_t Addend);
-  };
-
-
-  //===- ARMELFObjectWriter -------------------------------------------===//
-
-  class ARMELFObjectWriter : public ELFObjectWriter {
-  public:
-    // FIXME: MCAssembler can't yet return the Subtarget,
-    enum { DefaultEABIVersion = 0x05000000U };
-
-    ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                       raw_ostream &_OS,
-                       bool IsLittleEndian);
-
-    virtual ~ARMELFObjectWriter();
-
-    virtual void WriteEFlags();
-  protected:
-    virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
-                                           const MCValue &Target,
-                                           const MCFragment &F,
-                                           bool IsBSS) const;
-
-    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
-                                  bool IsPCRel, bool IsRelocWithSymbol,
-                                  int64_t Addend);
-  };
-
-  //===- MBlazeELFObjectWriter -------------------------------------------===//
-
-  class MBlazeELFObjectWriter : public ELFObjectWriter {
-  public:
-    MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                          raw_ostream &_OS,
-                          bool IsLittleEndian);
-
-    virtual ~MBlazeELFObjectWriter();
-  protected:
-    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
-                                  bool IsPCRel, bool IsRelocWithSymbol,
-                                  int64_t Addend);
-  };
-}
-
 ELFObjectWriter::~ELFObjectWriter()
 {}
 
@@ -533,7 +163,8 @@ void ELFObjectWriter::WriteSymbolEntry(MCDataFragment *SymtabF,
   }
 }
 
-static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout) {
+uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &Data,
+                                      const MCAsmLayout &Layout) {
   if (Data.isCommon() && Data.isExternal())
     return Data.getCommonAlignment();
 
@@ -579,7 +210,7 @@ void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
     // Aliases defined with .symvar copy the binding from the symbol they alias.
     // This is the first place we are able to copy this information.
     it->setExternal(SD.isExternal());
-    SetBinding(*it, GetBinding(SD));
+    MCELF::SetBinding(*it, MCELF::GetBinding(SD));
 
     StringRef Rest = AliasName.substr(Pos);
     if (!Symbol.isUndefined() && !Rest.startswith("@@@"))
@@ -605,9 +236,9 @@ void ELFObjectWriter::WriteSymbol(MCDataFragment *SymtabF,
   bool IsReserved = Data.isCommon() || Data.getSymbol().isAbsolute() ||
     Data.getSymbol().isVariable();
 
-  uint8_t Binding = GetBinding(OrigData);
-  uint8_t Visibility = GetVisibility(OrigData);
-  uint8_t Type = GetType(Data);
+  uint8_t Binding = MCELF::GetBinding(OrigData);
+  uint8_t Visibility = MCELF::GetVisibility(OrigData);
+  uint8_t Type = MCELF::GetType(Data);
 
   uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift);
   uint8_t Other = Visibility;
@@ -673,7 +304,7 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
             (Data.getFlags() & ELF_STB_Weak)) &&
            "External symbol requires STB_GLOBAL or STB_WEAK flag");
     WriteSymbol(SymtabF, ShndxF, MSD, Layout);
-    if (GetBinding(Data) == ELF::STB_LOCAL)
+    if (MCELF::GetBinding(Data) == ELF::STB_LOCAL)
       LastLocalSymbolIndex++;
   }
 
@@ -681,7 +312,7 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
     ELFSymbolData &MSD = UndefinedSymbolData[i];
     MCSymbolData &Data = *MSD.SymbolData;
     WriteSymbol(SymtabF, ShndxF, MSD, Layout);
-    if (GetBinding(Data) == ELF::STB_LOCAL)
+    if (MCELF::GetBinding(Data) == ELF::STB_LOCAL)
       LastLocalSymbolIndex++;
   }
 }
@@ -798,7 +429,7 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
   FixedValue = Value;
   unsigned Type = GetRelocType(Target, Fixup, IsPCRel,
                                (RelocSymbol != 0), Addend);
-  
+
   uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) +
     Fixup.getOffset();
 
@@ -816,8 +447,9 @@ ELFObjectWriter::getSymbolIndexInSymbolTable(const MCAssembler &Asm,
   return SD.getIndex();
 }
 
-static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
-                       bool Used, bool Renamed) {
+bool ELFObjectWriter::isInSymtab(const MCAssembler &Asm,
+                                 const MCSymbolData &Data,
+                                 bool Used, bool Renamed) {
   if (Data.getFlags() & ELF_Other_Weakref)
     return false;
 
@@ -836,7 +468,7 @@ static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
   if (Symbol.isVariable() && !A.isVariable() && A.isUndefined())
     return false;
 
-  bool IsGlobal = GetBinding(Data) == ELF::STB_GLOBAL;
+  bool IsGlobal = MCELF::GetBinding(Data) == ELF::STB_GLOBAL;
   if (!Symbol.isVariable() && Symbol.isUndefined() && !IsGlobal)
     return false;
 
@@ -849,8 +481,8 @@ static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
   return true;
 }
 
-static bool isLocal(const MCSymbolData &Data, bool isSignature,
-                    bool isUsedInReloc) {
+bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isSignature,
+                              bool isUsedInReloc) {
   if (Data.isExternal())
     return false;
 
@@ -868,7 +500,8 @@ static bool isLocal(const MCSymbolData &Data, bool isSignature,
 }
 
 void ELFObjectWriter::ComputeIndexMap(MCAssembler &Asm,
-                                      SectionIndexMapTy &SectionIndexMap) {
+                                      SectionIndexMapTy &SectionIndexMap,
+                                      const RelMapTy &RelMap) {
   unsigned Index = 1;
   for (MCAssembler::iterator it = Asm.begin(),
          ie = Asm.end(); it != ie; ++it) {
@@ -883,31 +516,37 @@ void ELFObjectWriter::ComputeIndexMap(MCAssembler &Asm,
          ie = Asm.end(); it != ie; ++it) {
     const MCSectionELF &Section =
       static_cast<const MCSectionELF &>(it->getSection());
-    if (Section.getType() == ELF::SHT_GROUP)
+    if (Section.getType() == ELF::SHT_GROUP ||
+        Section.getType() == ELF::SHT_REL ||
+        Section.getType() == ELF::SHT_RELA)
       continue;
     SectionIndexMap[&Section] = Index++;
+    const MCSectionELF *RelSection = RelMap.lookup(&Section);
+    if (RelSection)
+      SectionIndexMap[RelSection] = Index++;
   }
 }
 
 void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm,
                                       const SectionIndexMapTy &SectionIndexMap,
-                                      RevGroupMapTy RevGroupMap) {
+                                         RevGroupMapTy RevGroupMap,
+                                         unsigned NumRegularSections) {
   // FIXME: Is this the correct place to do this?
   if (NeedsGOT) {
     llvm::StringRef Name = "_GLOBAL_OFFSET_TABLE_";
     MCSymbol *Sym = Asm.getContext().GetOrCreateSymbol(Name);
     MCSymbolData &Data = Asm.getOrCreateSymbolData(*Sym);
     Data.setExternal(true);
-    SetBinding(Data, ELF::STB_GLOBAL);
+    MCELF::SetBinding(Data, ELF::STB_GLOBAL);
   }
 
-  // Build section lookup table.
-  int NumRegularSections = Asm.size();
-
   // Index 0 is always the empty string.
   StringMap<uint64_t> StringIndexMap;
   StringTable += '\x00';
 
+  // FIXME: We could optimize suffixes in strtab in the same way we
+  // optimize them in shstrtab.
+
   // Add the data for the symbols.
   for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
          ie = Asm.symbol_end(); it != ie; ++it) {
@@ -929,14 +568,14 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm,
     // Undefined symbols are global, but this is the first place we
     // are able to set it.
     bool Local = isLocal(*it, isSignature, Used);
-    if (!Local && GetBinding(*it) == ELF::STB_LOCAL) {
+    if (!Local && MCELF::GetBinding(*it) == ELF::STB_LOCAL) {
       MCSymbolData &SD = Asm.getSymbolData(RefSymbol);
-      SetBinding(*it, ELF::STB_GLOBAL);
-      SetBinding(SD, ELF::STB_GLOBAL);
+      MCELF::SetBinding(*it, ELF::STB_GLOBAL);
+      MCELF::SetBinding(SD, ELF::STB_GLOBAL);
     }
 
     if (RefSymbol.isUndefined() && !Used && WeakrefUsed)
-      SetBinding(*it, ELF::STB_WEAK);
+      MCELF::SetBinding(*it, ELF::STB_WEAK);
 
     if (it->isCommon()) {
       assert(!Local);
@@ -1004,11 +643,16 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm,
     UndefinedSymbolData[i].SymbolData->setIndex(Index++);
 }
 
-void ELFObjectWriter::WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
-                                      const MCSectionData &SD) {
-  if (!Relocations[&SD].empty()) {
+void ELFObjectWriter::CreateRelocationSections(MCAssembler &Asm,
+                                               MCAsmLayout &Layout,
+                                               RelMapTy &RelMap) {
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionData &SD = *it;
+    if (Relocations[&SD].empty())
+      continue;
+
     MCContext &Ctx = Asm.getContext();
-    const MCSectionELF *RelaSection;
     const MCSectionELF &Section =
       static_cast<const MCSectionELF&>(SD.getSection());
 
@@ -1022,17 +666,32 @@ void ELFObjectWriter::WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
     else
       EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
 
-    RelaSection = Ctx.getELFSection(RelaSectionName, hasRelocationAddend() ?
-                                    ELF::SHT_RELA : ELF::SHT_REL, 0,
-                                    SectionKind::getReadOnly(),
-                                    EntrySize, "");
+    const MCSectionELF *RelaSection =
+      Ctx.getELFSection(RelaSectionName, hasRelocationAddend() ?
+                        ELF::SHT_RELA : ELF::SHT_REL, 0,
+                        SectionKind::getReadOnly(),
+                        EntrySize, "");
+    RelMap[&Section] = RelaSection;
+    Asm.getOrCreateSectionData(*RelaSection);
+  }
+}
 
+void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
+                                       const RelMapTy &RelMap) {
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionData &SD = *it;
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF&>(SD.getSection());
+
+    const MCSectionELF *RelaSection = RelMap.lookup(&Section);
+    if (!RelaSection)
+      continue;
     MCSectionData &RelaSD = Asm.getOrCreateSectionData(*RelaSection);
     RelaSD.setAlignment(is64Bit() ? 8 : 4);
 
     MCDataFragment *F = new MCDataFragment(&RelaSD);
-
-    WriteRelocationsFragment(Asm, F, &SD);
+    WriteRelocationsFragment(Asm, F, &*it);
   }
 }
 
@@ -1092,9 +751,28 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
   }
 }
 
+static int compareBySuffix(const void *a, const void *b) {
+  const MCSectionELF *secA = *static_cast<const MCSectionELF* const *>(a);
+  const MCSectionELF *secB = *static_cast<const MCSectionELF* const *>(b);
+  const StringRef &NameA = secA->getSectionName();
+  const StringRef &NameB = secB->getSectionName();
+  const unsigned sizeA = NameA.size();
+  const unsigned sizeB = NameB.size();
+  const unsigned len = std::min(sizeA, sizeB);
+  for (unsigned int i = 0; i < len; ++i) {
+    char ca = NameA[sizeA - i - 1];
+    char cb = NameB[sizeB - i - 1];
+    if (ca != cb)
+      return cb - ca;
+  }
+
+  return sizeB - sizeA;
+}
+
 void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
                                              MCAsmLayout &Layout,
-                                    const SectionIndexMapTy &SectionIndexMap) {
+                                             SectionIndexMapTy &SectionIndexMap,
+                                             const RelMapTy &RelMap) {
   MCContext &Ctx = Asm.getContext();
   MCDataFragment *F;
 
@@ -1106,7 +784,6 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
                       SectionKind::getReadOnly());
   MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection);
   ShstrtabSD.setAlignment(1);
-  ShstrtabIndex = Asm.size();
 
   const MCSectionELF *SymtabSection =
     Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
@@ -1114,7 +791,6 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
                       EntrySize, "");
   MCSectionData &SymtabSD = Asm.getOrCreateSectionData(*SymtabSection);
   SymtabSD.setAlignment(is64Bit() ? 8 : 4);
-  SymbolTableIndex = Asm.size();
 
   MCSectionData *SymtabShndxSD = NULL;
 
@@ -1126,14 +802,17 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
     SymtabShndxSD->setAlignment(4);
   }
 
-  const MCSection *StrtabSection;
+  const MCSectionELF *StrtabSection;
   StrtabSection = Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0,
                                     SectionKind::getReadOnly());
   MCSectionData &StrtabSD = Asm.getOrCreateSectionData(*StrtabSection);
   StrtabSD.setAlignment(1);
-  StringTableIndex = Asm.size();
 
-  WriteRelocations(Asm, Layout);
+  ComputeIndexMap(Asm, SectionIndexMap, RelMap);
+
+  ShstrtabIndex = SectionIndexMap.lookup(ShstrtabSection);
+  SymbolTableIndex = SectionIndexMap.lookup(SymtabSection);
+  StringTableIndex = SectionIndexMap.lookup(StrtabSection);
 
   // Symbol table
   F = new MCDataFragment(&SymtabSD);
@@ -1148,6 +827,15 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
 
   F = new MCDataFragment(&ShstrtabSD);
 
+  std::vector<const MCSectionELF*> Sections;
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF&>(it->getSection());
+    Sections.push_back(&Section);
+  }
+  array_pod_sort(Sections.begin(), Sections.end(), compareBySuffix);
+
   // Section header string table.
   //
   // The first entry of a string table holds a null character so skip
@@ -1155,22 +843,20 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
   uint64_t Index = 1;
   F->getContents() += '\x00';
 
-  StringMap<uint64_t> SecStringMap;
-  for (MCAssembler::const_iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF&>(it->getSection());
-    // FIXME: We could merge suffixes like in .text and .rela.text.
+  for (unsigned int I = 0, E = Sections.size(); I != E; ++I) {
+    const MCSectionELF &Section = *Sections[I];
 
     StringRef Name = Section.getSectionName();
-    if (SecStringMap.count(Name)) {
-      SectionStringTableIndex[&Section] =  SecStringMap[Name];
-      continue;
+    if (I != 0) {
+      StringRef PreviousName = Sections[I - 1]->getSectionName();
+      if (PreviousName.endswith(Name)) {
+        SectionStringTableIndex[&Section] = Index - Name.size() - 1;
+        continue;
+      }
     }
     // Remember the index into the string table so we can write it
     // into the sh_name field of the section header table.
     SectionStringTableIndex[&Section] = Index;
-    SecStringMap[Name] = Index;
 
     Index += Name.size() + 1;
     F->getContents() += Name;
@@ -1181,7 +867,9 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
 void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm,
                                             MCAsmLayout &Layout,
                                             GroupMapTy &GroupMap,
-                                            RevGroupMapTy &RevGroupMap) {
+                                            RevGroupMapTy &RevGroupMap,
+                                            SectionIndexMapTy &SectionIndexMap,
+                                            const RelMapTy &RelMap) {
   // Create the .note.GNU-stack section if needed.
   MCContext &Ctx = Asm.getContext();
   if (Asm.getNoExecStack()) {
@@ -1212,11 +900,11 @@ void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm,
     GroupMap[Group] = SignatureSymbol;
   }
 
+  ComputeIndexMap(Asm, SectionIndexMap, RelMap);
+
   // Add sections to the groups
-  unsigned Index = 1;
-  unsigned NumGroups = RevGroupMap.size();
   for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
-       it != ie; ++it, ++Index) {
+       it != ie; ++it) {
     const MCSectionELF &Section =
       static_cast<const MCSectionELF&>(it->getSection());
     if (!(Section.getFlags() & ELF::SHF_GROUP))
@@ -1225,7 +913,8 @@ void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm,
     MCSectionData &Data = Asm.getOrCreateSectionData(*Group);
     // FIXME: we could use the previous fragment
     MCDataFragment *F = new MCDataFragment(&Data);
-    String32(*F, NumGroups + Index);
+    unsigned Index = SectionIndexMap.lookup(&Section);
+    String32(*F, Index);
   }
 }
 
@@ -1304,12 +993,12 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
                    Alignment, Section.getEntrySize());
 }
 
-static bool IsELFMetaDataSection(const MCSectionData &SD) {
+bool ELFObjectWriter::IsELFMetaDataSection(const MCSectionData &SD) {
   return SD.getOrdinal() == ~UINT32_C(0) &&
     !SD.getSection().isVirtualSection();
 }
 
-static uint64_t DataSectionSize(const MCSectionData &SD) {
+uint64_t ELFObjectWriter::DataSectionSize(const MCSectionData &SD) {
   uint64_t Ret = 0;
   for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
        ++i) {
@@ -1320,26 +1009,112 @@ static uint64_t DataSectionSize(const MCSectionData &SD) {
   return Ret;
 }
 
-static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
-                                   const MCSectionData &SD) {
+uint64_t ELFObjectWriter::GetSectionFileSize(const MCAsmLayout &Layout,
+                                             const MCSectionData &SD) {
   if (IsELFMetaDataSection(SD))
     return DataSectionSize(SD);
   return Layout.getSectionFileSize(&SD);
 }
 
-static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout,
-                                      const MCSectionData &SD) {
+uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout,
+                                                const MCSectionData &SD) {
   if (IsELFMetaDataSection(SD))
     return DataSectionSize(SD);
   return Layout.getSectionAddressSize(&SD);
 }
 
-static void WriteDataSectionData(ELFObjectWriter *W, const MCSectionData &SD) {
-  for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
-       ++i) {
-    const MCFragment &F = *i;
-    assert(F.getKind() == MCFragment::FT_Data);
-    W->WriteBytes(cast<MCDataFragment>(F).getContents().str());
+void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
+                                           const MCAsmLayout &Layout,
+                                           const MCSectionELF &Section) {
+  uint64_t FileOff = OS.tell();
+  const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+
+  uint64_t Padding = OffsetToAlignment(FileOff, SD.getAlignment());
+  WriteZeros(Padding);
+  FileOff += Padding;
+
+  FileOff += GetSectionFileSize(Layout, SD);
+
+  if (IsELFMetaDataSection(SD)) {
+    for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
+         ++i) {
+      const MCFragment &F = *i;
+      assert(F.getKind() == MCFragment::FT_Data);
+      WriteBytes(cast<MCDataFragment>(F).getContents().str());
+    }
+  } else {
+    Asm.WriteSectionData(&SD, Layout);
+  }
+}
+
+void ELFObjectWriter::WriteSectionHeader(MCAssembler &Asm,
+                                         const GroupMapTy &GroupMap,
+                                         const MCAsmLayout &Layout,
+                                      const SectionIndexMapTy &SectionIndexMap,
+                                   const SectionOffsetMapTy &SectionOffsetMap) {
+  const unsigned NumSections = Asm.size() + 1;
+
+  std::vector<const MCSectionELF*> Sections;
+  Sections.resize(NumSections - 1);
+
+  for (SectionIndexMapTy::const_iterator i=
+         SectionIndexMap.begin(), e = SectionIndexMap.end(); i != e; ++i) {
+    const std::pair<const MCSectionELF*, uint32_t> &p = *i;
+    Sections[p.second - 1] = p.first;
+  }
+
+  // Null section first.
+  uint64_t FirstSectionSize =
+    NumSections >= ELF::SHN_LORESERVE ? NumSections : 0;
+  uint32_t FirstSectionLink =
+    ShstrtabIndex >= ELF::SHN_LORESERVE ? ShstrtabIndex : 0;
+  WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, FirstSectionLink, 0, 0, 0);
+
+  for (unsigned i = 0; i < NumSections - 1; ++i) {
+    const MCSectionELF &Section = *Sections[i];
+    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+    uint32_t GroupSymbolIndex;
+    if (Section.getType() != ELF::SHT_GROUP)
+      GroupSymbolIndex = 0;
+    else
+      GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm,
+                                                     GroupMap.lookup(&Section));
+
+    uint64_t Size = GetSectionAddressSize(Layout, SD);
+
+    WriteSection(Asm, SectionIndexMap, GroupSymbolIndex,
+                 SectionOffsetMap.lookup(&Section), Size,
+                 SD.getAlignment(), Section);
+  }
+}
+
+void ELFObjectWriter::ComputeSectionOrder(MCAssembler &Asm,
+                                  std::vector<const MCSectionELF*> &Sections) {
+  for (MCAssembler::iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF &>(it->getSection());
+    if (Section.getType() == ELF::SHT_GROUP)
+      Sections.push_back(&Section);
+  }
+
+  for (MCAssembler::iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF &>(it->getSection());
+    if (Section.getType() != ELF::SHT_GROUP &&
+        Section.getType() != ELF::SHT_REL &&
+        Section.getType() != ELF::SHT_RELA)
+      Sections.push_back(&Section);
+  }
+
+  for (MCAssembler::iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF &>(it->getSection());
+    if (Section.getType() == ELF::SHT_REL ||
+        Section.getType() == ELF::SHT_RELA)
+      Sections.push_back(&Section);
   }
 }
 
@@ -1347,107 +1122,108 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
                                   const MCAsmLayout &Layout) {
   GroupMapTy GroupMap;
   RevGroupMapTy RevGroupMap;
-  CreateIndexedSections(Asm, const_cast<MCAsmLayout&>(Layout), GroupMap,
-                        RevGroupMap);
-
   SectionIndexMapTy SectionIndexMap;
 
-  ComputeIndexMap(Asm, SectionIndexMap);
+  unsigned NumUserSections = Asm.size();
+
+  DenseMap<const MCSectionELF*, const MCSectionELF*> RelMap;
+  CreateRelocationSections(Asm, const_cast<MCAsmLayout&>(Layout), RelMap);
+
+  const unsigned NumUserAndRelocSections = Asm.size();
+  CreateIndexedSections(Asm, const_cast<MCAsmLayout&>(Layout), GroupMap,
+                        RevGroupMap, SectionIndexMap, RelMap);
+  const unsigned AllSections = Asm.size();
+  const unsigned NumIndexedSections = AllSections - NumUserAndRelocSections;
+
+  unsigned NumRegularSections = NumUserSections + NumIndexedSections;
 
   // Compute symbol table information.
-  ComputeSymbolTable(Asm, SectionIndexMap, RevGroupMap);
+  ComputeSymbolTable(Asm, SectionIndexMap, RevGroupMap, NumRegularSections);
+
+
+  WriteRelocations(Asm, const_cast<MCAsmLayout&>(Layout), RelMap);
 
   CreateMetadataSections(const_cast<MCAssembler&>(Asm),
                          const_cast<MCAsmLayout&>(Layout),
-                         SectionIndexMap);
-
-  // Update to include the metadata sections.
-  ComputeIndexMap(Asm, SectionIndexMap);
+                         SectionIndexMap,
+                         RelMap);
 
-  // Add 1 for the null section.
-  unsigned NumSections = Asm.size() + 1;
   uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
   uint64_t HeaderSize = is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
                                     sizeof(ELF::Elf32_Ehdr);
   uint64_t FileOff = HeaderSize;
 
   std::vector<const MCSectionELF*> Sections;
-  Sections.resize(NumSections);
-
-  for (SectionIndexMapTy::const_iterator i=
-         SectionIndexMap.begin(), e = SectionIndexMap.end(); i != e; ++i) {
-    const std::pair<const MCSectionELF*, uint32_t> &p = *i;
-    Sections[p.second] = p.first;
-  }
-
-  for (unsigned i = 1; i < NumSections; ++i) {
+  ComputeSectionOrder(Asm, Sections);
+  unsigned NumSections = Sections.size();
+  SectionOffsetMapTy SectionOffsetMap;
+  for (unsigned i = 0; i < NumRegularSections + 1; ++i) {
     const MCSectionELF &Section = *Sections[i];
     const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
 
     FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
 
+    // Remember the offset into the file for this section.
+    SectionOffsetMap[&Section] = FileOff;
+
     // Get the size of the section in the output file (including padding).
     FileOff += GetSectionFileSize(Layout, SD);
   }
 
   FileOff = RoundUpToAlignment(FileOff, NaturalAlignment);
 
-  // Write out the ELF header ...
-  WriteHeader(FileOff - HeaderSize, NumSections);
-
-  FileOff = HeaderSize;
+  const unsigned SectionHeaderOffset = FileOff - HeaderSize;
 
-  // ... then all of the sections ...
-  DenseMap<const MCSection*, uint64_t> SectionOffsetMap;
+  uint64_t SectionHeaderEntrySize = is64Bit() ?
+    sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr);
+  FileOff += (NumSections + 1) * SectionHeaderEntrySize;
 
-  for (unsigned i = 1; i < NumSections; ++i) {
+  for (unsigned i = NumRegularSections + 1; i < NumSections; ++i) {
     const MCSectionELF &Section = *Sections[i];
     const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
 
-    uint64_t Padding = OffsetToAlignment(FileOff, SD.getAlignment());
-    WriteZeros(Padding);
-    FileOff += Padding;
+    FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
 
     // Remember the offset into the file for this section.
     SectionOffsetMap[&Section] = FileOff;
 
+    // Get the size of the section in the output file (including padding).
     FileOff += GetSectionFileSize(Layout, SD);
-
-    if (IsELFMetaDataSection(SD))
-      WriteDataSectionData(this, SD);
-    else
-      Asm.WriteSectionData(&SD, Layout);
   }
 
+  // Write out the ELF header ...
+  WriteHeader(SectionHeaderOffset, NumSections + 1);
+
+  // ... then the regular sections ...
+  // + because of .shstrtab
+  for (unsigned i = 0; i < NumRegularSections + 1; ++i)
+    WriteDataSectionData(Asm, Layout, *Sections[i]);
+
+  FileOff = OS.tell();
   uint64_t Padding = OffsetToAlignment(FileOff, NaturalAlignment);
   WriteZeros(Padding);
-  FileOff += Padding;
 
-  // ... and then the section header table.
-  // Should we align the section header table?
-  //
-  // Null section first.
-  uint64_t FirstSectionSize =
-    NumSections >= ELF::SHN_LORESERVE ? NumSections : 0;
-  uint32_t FirstSectionLink =
-    ShstrtabIndex >= ELF::SHN_LORESERVE ? ShstrtabIndex : 0;
-  WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, FirstSectionLink, 0, 0, 0);
+  // ... then the section header table ...
+  WriteSectionHeader(Asm, GroupMap, Layout, SectionIndexMap,
+                     SectionOffsetMap);
 
-  for (unsigned i = 1; i < NumSections; ++i) {
-    const MCSectionELF &Section = *Sections[i];
-    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
-    uint32_t GroupSymbolIndex;
-    if (Section.getType() != ELF::SHT_GROUP)
-      GroupSymbolIndex = 0;
-    else
-      GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm, GroupMap[&Section]);
+  FileOff = OS.tell();
 
-    uint64_t Size = GetSectionAddressSize(Layout, SD);
+  // ... and then the remainting sections ...
+  for (unsigned i = NumRegularSections + 1; i < NumSections; ++i)
+    WriteDataSectionData(Asm, Layout, *Sections[i]);
+}
 
-    WriteSection(Asm, SectionIndexMap, GroupSymbolIndex,
-                 SectionOffsetMap[&Section], Size,
-                 SD.getAlignment(), Section);
-  }
+bool
+ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                                      const MCSymbolData &DataA,
+                                                      const MCFragment &FB,
+                                                      bool InSet,
+                                                      bool IsPCRel) const {
+  if (DataA.getFlags() & ELF_STB_Weak)
+    return false;
+  return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+                                                 Asm, DataA, FB,InSet, IsPCRel);
 }
 
 MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
@@ -1700,13 +1476,17 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
     if (IsPCRel) {
       switch ((unsigned)Fixup.getKind()) {
       default: llvm_unreachable("invalid fixup kind!");
+
+      case FK_Data_8: Type = ELF::R_X86_64_PC64; break;
+      case FK_Data_4: Type = ELF::R_X86_64_PC32; break;
+      case FK_Data_2: Type = ELF::R_X86_64_PC16; break;
+
       case FK_PCRel_8:
         assert(Modifier == MCSymbolRefExpr::VK_None);
         Type = ELF::R_X86_64_PC64;
         break;
       case X86::reloc_signed_4byte:
       case X86::reloc_riprel_4byte_movq_load:
-      case FK_Data_4: // FIXME?
       case X86::reloc_riprel_4byte:
       case FK_PCRel_4:
         switch (Modifier) {
diff --git a/lib/MC/ELFObjectWriter.h b/lib/MC/ELFObjectWriter.h
new file mode 100644
index 0000000..f1d514a
--- /dev/null
+++ b/lib/MC/ELFObjectWriter.h
@@ -0,0 +1,406 @@
+//===- lib/MC/ELFObjectWriter.h - ELF File Writer -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_ELFOBJECTWRITER_H
+#define LLVM_MC_ELFOBJECTWRITER_H
+
+#include "MCELF.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+
+#include <vector>
+
+namespace llvm {
+
+class MCSection;
+class MCDataFragment;
+class MCSectionELF;
+
+class ELFObjectWriter : public MCObjectWriter {
+  protected:
+
+    static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
+    static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant);
+    static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout);
+    static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
+                           bool Used, bool Renamed);
+    static bool isLocal(const MCSymbolData &Data, bool isSignature,
+                        bool isUsedInReloc);
+    static bool IsELFMetaDataSection(const MCSectionData &SD);
+    static uint64_t DataSectionSize(const MCSectionData &SD);
+    static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
+                                       const MCSectionData &SD);
+    static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout,
+                                          const MCSectionData &SD);
+
+    void WriteDataSectionData(MCAssembler &Asm,
+                              const MCAsmLayout &Layout,
+                              const MCSectionELF &Section);
+
+    /*static bool isFixupKindX86RIPRel(unsigned Kind) {
+      return Kind == X86::reloc_riprel_4byte ||
+        Kind == X86::reloc_riprel_4byte_movq_load;
+    }*/
+
+    /// ELFSymbolData - Helper struct for containing some precomputed
+    /// information on symbols.
+    struct ELFSymbolData {
+      MCSymbolData *SymbolData;
+      uint64_t StringIndex;
+      uint32_t SectionIndex;
+
+      // Support lexicographic sorting.
+      bool operator<(const ELFSymbolData &RHS) const {
+        if (MCELF::GetType(*SymbolData) == ELF::STT_FILE)
+          return true;
+        if (MCELF::GetType(*RHS.SymbolData) == ELF::STT_FILE)
+          return false;
+        return SymbolData->getSymbol().getName() <
+               RHS.SymbolData->getSymbol().getName();
+      }
+    };
+
+    /// @name Relocation Data
+    /// @{
+
+    struct ELFRelocationEntry {
+      // Make these big enough for both 32-bit and 64-bit
+      uint64_t r_offset;
+      int Index;
+      unsigned Type;
+      const MCSymbol *Symbol;
+      uint64_t r_addend;
+
+      ELFRelocationEntry()
+        : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0) {}
+
+      ELFRelocationEntry(uint64_t RelocOffset, int Idx,
+                         unsigned RelType, const MCSymbol *Sym,
+                         uint64_t Addend)
+        : r_offset(RelocOffset), Index(Idx), Type(RelType),
+          Symbol(Sym), r_addend(Addend) {}
+
+      // Support lexicographic sorting.
+      bool operator<(const ELFRelocationEntry &RE) const {
+        return RE.r_offset < r_offset;
+      }
+    };
+
+    /// The target specific ELF writer instance.
+    llvm::OwningPtr<MCELFObjectTargetWriter> TargetObjectWriter;
+
+    SmallPtrSet<const MCSymbol *, 16> UsedInReloc;
+    SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc;
+    DenseMap<const MCSymbol *, const MCSymbol *> Renames;
+
+    llvm::DenseMap<const MCSectionData*,
+                   std::vector<ELFRelocationEntry> > Relocations;
+    DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
+
+    /// @}
+    /// @name Symbol Table Data
+    /// @{
+
+    SmallString<256> StringTable;
+    std::vector<ELFSymbolData> LocalSymbolData;
+    std::vector<ELFSymbolData> ExternalSymbolData;
+    std::vector<ELFSymbolData> UndefinedSymbolData;
+
+    /// @}
+
+    bool NeedsGOT;
+
+    bool NeedsSymtabShndx;
+
+    // This holds the symbol table index of the last local symbol.
+    unsigned LastLocalSymbolIndex;
+    // This holds the .strtab section index.
+    unsigned StringTableIndex;
+    // This holds the .symtab section index.
+    unsigned SymbolTableIndex;
+
+    unsigned ShstrtabIndex;
+
+
+    const MCSymbol *SymbolToReloc(const MCAssembler &Asm,
+                                  const MCValue &Target,
+                                  const MCFragment &F) const;
+
+    // For arch-specific emission of explicit reloc symbol
+    virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+                                           const MCValue &Target,
+                                           const MCFragment &F,
+                                           bool IsBSS) const {
+      return NULL;
+    }
+
+    bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+    bool hasRelocationAddend() const {
+      return TargetObjectWriter->hasRelocationAddend();
+    }
+
+  public:
+    ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                    raw_ostream &_OS, bool IsLittleEndian)
+      : MCObjectWriter(_OS, IsLittleEndian),
+        TargetObjectWriter(MOTW),
+        NeedsGOT(false), NeedsSymtabShndx(false){
+    }
+
+    virtual ~ELFObjectWriter();
+
+    void WriteWord(uint64_t W) {
+      if (is64Bit())
+        Write64(W);
+      else
+        Write32(W);
+    }
+
+    void StringLE16(char *buf, uint16_t Value) {
+      buf[0] = char(Value >> 0);
+      buf[1] = char(Value >> 8);
+    }
+
+    void StringLE32(char *buf, uint32_t Value) {
+      StringLE16(buf, uint16_t(Value >> 0));
+      StringLE16(buf + 2, uint16_t(Value >> 16));
+    }
+
+    void StringLE64(char *buf, uint64_t Value) {
+      StringLE32(buf, uint32_t(Value >> 0));
+      StringLE32(buf + 4, uint32_t(Value >> 32));
+    }
+
+    void StringBE16(char *buf ,uint16_t Value) {
+      buf[0] = char(Value >> 8);
+      buf[1] = char(Value >> 0);
+    }
+
+    void StringBE32(char *buf, uint32_t Value) {
+      StringBE16(buf, uint16_t(Value >> 16));
+      StringBE16(buf + 2, uint16_t(Value >> 0));
+    }
+
+    void StringBE64(char *buf, uint64_t Value) {
+      StringBE32(buf, uint32_t(Value >> 32));
+      StringBE32(buf + 4, uint32_t(Value >> 0));
+    }
+
+    void String8(MCDataFragment &F, uint8_t Value) {
+      char buf[1];
+      buf[0] = Value;
+      F.getContents() += StringRef(buf, 1);
+    }
+
+    void String16(MCDataFragment &F, uint16_t Value) {
+      char buf[2];
+      if (isLittleEndian())
+        StringLE16(buf, Value);
+      else
+        StringBE16(buf, Value);
+      F.getContents() += StringRef(buf, 2);
+    }
+
+    void String32(MCDataFragment &F, uint32_t Value) {
+      char buf[4];
+      if (isLittleEndian())
+        StringLE32(buf, Value);
+      else
+        StringBE32(buf, Value);
+      F.getContents() += StringRef(buf, 4);
+    }
+
+    void String64(MCDataFragment &F, uint64_t Value) {
+      char buf[8];
+      if (isLittleEndian())
+        StringLE64(buf, Value);
+      else
+        StringBE64(buf, Value);
+      F.getContents() += StringRef(buf, 8);
+    }
+
+    virtual void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections);
+
+    /// Default e_flags = 0
+    virtual void WriteEFlags() { Write32(0); }
+
+    virtual void WriteSymbolEntry(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+                          uint64_t name, uint8_t info,
+                          uint64_t value, uint64_t size,
+                          uint8_t other, uint32_t shndx,
+                          bool Reserved);
+
+    virtual void WriteSymbol(MCDataFragment *SymtabF,  MCDataFragment *ShndxF,
+                     ELFSymbolData &MSD,
+                     const MCAsmLayout &Layout);
+
+    typedef DenseMap<const MCSectionELF*, uint32_t> SectionIndexMapTy;
+    virtual void WriteSymbolTable(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+                          const MCAssembler &Asm,
+                          const MCAsmLayout &Layout,
+                          const SectionIndexMapTy &SectionIndexMap);
+
+    virtual void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                                  const MCFragment *Fragment, const MCFixup &Fixup,
+                                  MCValue Target, uint64_t &FixedValue);
+
+    virtual uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+                                         const MCSymbol *S);
+
+    // Map from a group section to the signature symbol
+    typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy;
+    // Map from a signature symbol to the group section
+    typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy;
+    // Map from a section to the section with the relocations
+    typedef DenseMap<const MCSectionELF*, const MCSectionELF*> RelMapTy;
+    // Map from a section to its offset
+    typedef DenseMap<const MCSectionELF*, uint64_t> SectionOffsetMapTy;
+
+    /// ComputeSymbolTable - Compute the symbol table data
+    ///
+    /// \param StringTable [out] - The string table data.
+    /// \param StringIndexMap [out] - Map from symbol names to offsets in the
+    /// string table.
+    virtual void ComputeSymbolTable(MCAssembler &Asm,
+                            const SectionIndexMapTy &SectionIndexMap,
+                                    RevGroupMapTy RevGroupMap,
+                                    unsigned NumRegularSections);
+
+    virtual void ComputeIndexMap(MCAssembler &Asm,
+                                 SectionIndexMapTy &SectionIndexMap,
+                                 const RelMapTy &RelMap);
+
+    void CreateRelocationSections(MCAssembler &Asm, MCAsmLayout &Layout,
+                                  RelMapTy &RelMap);
+
+    void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
+                          const RelMapTy &RelMap);
+
+    virtual void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout,
+                                        SectionIndexMapTy &SectionIndexMap,
+                                        const RelMapTy &RelMap);
+
+    // Create the sections that show up in the symbol table. Currently
+    // those are the .note.GNU-stack section and the group sections.
+    virtual void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout,
+                                       GroupMapTy &GroupMap,
+                                       RevGroupMapTy &RevGroupMap,
+                                       SectionIndexMapTy &SectionIndexMap,
+                                       const RelMapTy &RelMap);
+
+    virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+                                          const MCAsmLayout &Layout);
+
+    void WriteSectionHeader(MCAssembler &Asm, const GroupMapTy &GroupMap,
+                            const MCAsmLayout &Layout,
+                            const SectionIndexMapTy &SectionIndexMap,
+                            const SectionOffsetMapTy &SectionOffsetMap);
+
+    void ComputeSectionOrder(MCAssembler &Asm,
+                             std::vector<const MCSectionELF*> &Sections);
+
+    virtual void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
+                          uint64_t Address, uint64_t Offset,
+                          uint64_t Size, uint32_t Link, uint32_t Info,
+                          uint64_t Alignment, uint64_t EntrySize);
+
+    virtual void WriteRelocationsFragment(const MCAssembler &Asm,
+                                          MCDataFragment *F,
+                                          const MCSectionData *SD);
+
+    virtual bool
+    IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                           const MCSymbolData &DataA,
+                                           const MCFragment &FB,
+                                           bool InSet,
+                                           bool IsPCRel) const;
+
+    virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+    virtual void WriteSection(MCAssembler &Asm,
+                      const SectionIndexMapTy &SectionIndexMap,
+                      uint32_t GroupSymbolIndex,
+                      uint64_t Offset, uint64_t Size, uint64_t Alignment,
+                      const MCSectionELF &Section);
+
+  protected:
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend) = 0;
+  };
+
+  //===- X86ELFObjectWriter -------------------------------------------===//
+
+  class X86ELFObjectWriter : public ELFObjectWriter {
+  public:
+    X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                       raw_ostream &_OS,
+                       bool IsLittleEndian);
+
+    virtual ~X86ELFObjectWriter();
+  protected:
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend);
+  };
+
+
+  //===- ARMELFObjectWriter -------------------------------------------===//
+
+  class ARMELFObjectWriter : public ELFObjectWriter {
+  public:
+    // FIXME: MCAssembler can't yet return the Subtarget,
+    enum { DefaultEABIVersion = 0x05000000U };
+
+    ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                       raw_ostream &_OS,
+                       bool IsLittleEndian);
+
+    virtual ~ARMELFObjectWriter();
+
+    virtual void WriteEFlags();
+  protected:
+    virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+                                           const MCValue &Target,
+                                           const MCFragment &F,
+                                           bool IsBSS) const;
+
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend);
+  };
+
+  //===- MBlazeELFObjectWriter -------------------------------------------===//
+
+  class MBlazeELFObjectWriter : public ELFObjectWriter {
+  public:
+    MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                          raw_ostream &_OS,
+                          bool IsLittleEndian);
+
+    virtual ~MBlazeELFObjectWriter();
+  protected:
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend);
+  };
+}
+
+#endif
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 8199fb2..541dd08 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -13,7 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Dwarf.h"
 #include <cctype>
 #include <cstring>
 using namespace llvm;
@@ -26,7 +30,7 @@ MCAsmInfo::MCAsmInfo() {
   LinkerRequiresNonEmptyDwarfLines = false;
   MaxInstLength = 4;
   PCSymbol = "$";
-  SeparatorChar = ';';
+  SeparatorString = ";";
   CommentColumn = 40;
   CommentString = "#";
   LabelSuffix = ":";
@@ -106,3 +110,25 @@ unsigned MCAsmInfo::getSLEB128Size(int Value) {
   } while (IsMore);
   return Size;
 }
+
+const MCExpr *
+MCAsmInfo::getExprForPersonalitySymbol(const MCSymbol *Sym,
+                                       unsigned Encoding,
+                                       MCStreamer &Streamer) const {
+  return getExprForFDESymbol(Sym, Encoding, Streamer);
+}
+
+const MCExpr *
+MCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym,
+                               unsigned Encoding,
+                               MCStreamer &Streamer) const {
+  if (!(Encoding & dwarf::DW_EH_PE_pcrel))
+    return MCSymbolRefExpr::Create(Sym, Streamer.getContext());
+
+  MCContext &Context = Streamer.getContext();
+  const MCExpr *Res = MCSymbolRefExpr::Create(Sym, Context);
+  MCSymbol *PCSym = Context.CreateTempSymbol();
+  Streamer.EmitLabel(PCSym);
+  const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context);
+  return MCBinaryExpr::CreateSub(Res, PC, Context);
+}
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index 526ad0d..4dd1d44 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -13,6 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
 using namespace llvm;
 
 MCAsmInfoDarwin::MCAsmInfoDarwin() {
@@ -56,4 +59,3 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
   DwarfUsesAbsoluteLabelForStmtList = false;
   DwarfUsesLabelOffsetForRanges = false;
 }
-
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 8d06982..9717c01 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -45,20 +45,25 @@ class MCAsmStreamer : public MCStreamer {
   unsigned IsVerboseAsm : 1;
   unsigned ShowInst : 1;
   unsigned UseLoc : 1;
+  unsigned UseCFI : 1;
+
+  enum EHSymbolFlags { EHGlobal         = 1,
+                       EHWeakDefinition = 1 << 1,
+                       EHPrivateExtern  = 1 << 2 };
+  DenseMap<const MCSymbol*, unsigned> FlagMap;
 
   bool needsSet(const MCExpr *Value);
 
 public:
   MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
-                bool isVerboseAsm,
-                bool useLoc,
+                bool isVerboseAsm, bool useLoc, bool useCFI,
                 MCInstPrinter *printer, MCCodeEmitter *emitter,
                 TargetAsmBackend *asmbackend,
                 bool showInst)
     : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
       InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
       CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
-      ShowInst(showInst), UseLoc(useLoc) {
+      ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI) {
     if (InstPrinter && IsVerboseAsm)
       InstPrinter->setCommentStream(CommentStream);
   }
@@ -118,7 +123,8 @@ public:
   }
 
   virtual void EmitLabel(MCSymbol *Symbol);
-
+  virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
+                                   MCSymbol *EHSymbol);
   virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
   virtual void EmitThumbFunc(MCSymbol *Func);
 
@@ -127,6 +133,8 @@ public:
   virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                         const MCSymbol *LastLabel,
                                         const MCSymbol *Label);
+  virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+                                         const MCSymbol *Label);
 
   virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
 
@@ -154,13 +162,13 @@ public:
   virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
 
   virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                             bool isPCRel, unsigned AddrSpace);
+                             unsigned AddrSpace);
   virtual void EmitIntValue(uint64_t Value, unsigned Size,
                             unsigned AddrSpace = 0);
 
-  virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+  virtual void EmitULEB128Value(const MCExpr *Value);
 
-  virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+  virtual void EmitSLEB128Value(const MCExpr *Value);
 
   virtual void EmitGPRel32Value(const MCExpr *Value);
 
@@ -182,15 +190,32 @@ public:
   virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
   virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
                                      unsigned Column, unsigned Flags,
-                                     unsigned Isa, unsigned Discriminator);
+                                     unsigned Isa, unsigned Discriminator,
+                                     StringRef FileName);
+
+  virtual void EmitCFIStartProc();
+  virtual void EmitCFIEndProc();
+  virtual void EmitCFIDefCfa(int64_t Register, int64_t Offset);
+  virtual void EmitCFIDefCfaOffset(int64_t Offset);
+  virtual void EmitCFIDefCfaRegister(int64_t Register);
+  virtual void EmitCFIOffset(int64_t Register, int64_t Offset);
+  virtual void EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding);
+  virtual void EmitCFILsda(const MCSymbol *Sym, unsigned Encoding);
+  virtual void EmitCFIRememberState();
+  virtual void EmitCFIRestoreState();
+  virtual void EmitCFISameValue(int64_t Register);
+  virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset);
+  virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment);
+
+  virtual void EmitFnStart();
+  virtual void EmitFnEnd();
+  virtual void EmitCantUnwind();
+  virtual void EmitPersonality(const MCSymbol *Personality);
+  virtual void EmitHandlerData();
+  virtual void EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0);
+  virtual void EmitPad(int64_t Offset);
+  virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool);
 
-  virtual bool EmitCFIStartProc();
-  virtual bool EmitCFIEndProc();
-  virtual bool EmitCFIDefCfaOffset(int64_t Offset);
-  virtual bool EmitCFIDefCfaRegister(int64_t Register);
-  virtual bool EmitCFIOffset(int64_t Register, int64_t Offset);
-  virtual bool EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding);
-  virtual bool EmitCFILsda(const MCSymbol *Sym, unsigned Encoding);
 
   virtual void EmitInstruction(const MCInst &Inst);
 
@@ -259,14 +284,27 @@ void MCAsmStreamer::ChangeSection(const MCSection *Section) {
   Section->PrintSwitchToSection(MAI, OS);
 }
 
+void MCAsmStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
+                                        MCSymbol *EHSymbol) {
+  if (UseCFI)
+    return;
+
+  unsigned Flags = FlagMap.lookup(Symbol);
+
+  if (Flags & EHGlobal)
+    EmitSymbolAttribute(EHSymbol, MCSA_Global);
+  if (Flags & EHWeakDefinition)
+    EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition);
+  if (Flags & EHPrivateExtern)
+    EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern);
+}
+
 void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
-  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(getCurrentSection() && "Cannot emit before setting section!");
+  MCStreamer::EmitLabel(Symbol);
 
   OS << *Symbol << MAI.getLabelSuffix();
   EmitEOL();
-  Symbol->setSection(*getCurrentSection());
 }
 
 void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
@@ -309,6 +347,15 @@ void MCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                        getContext().getTargetAsmInfo().getPointerSize());
 }
 
+void MCAsmStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+                                              const MCSymbol *Label) {
+  EmitIntValue(dwarf::DW_CFA_advance_loc4, 1);
+  const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+  AddrDelta = ForceExpAbs(this, getContext(), AddrDelta);
+  EmitValue(AddrDelta, 4);
+}
+
+
 void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
                                         MCSymbolAttr Attribute) {
   switch (Attribute) {
@@ -337,6 +384,7 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
     return;
   case MCSA_Global: // .globl/.global
     OS << MAI.getGlobalDirective();
+    FlagMap[Symbol] |= EHGlobal;
     break;
   case MCSA_Hidden:         OS << "\t.hidden\t";          break;
   case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break;
@@ -345,11 +393,17 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
   case MCSA_Local:          OS << "\t.local\t";           break;
   case MCSA_NoDeadStrip:    OS << "\t.no_dead_strip\t";   break;
   case MCSA_SymbolResolver: OS << "\t.symbol_resolver\t"; break;
-  case MCSA_PrivateExtern:  OS << "\t.private_extern\t";  break;
+  case MCSA_PrivateExtern:
+    OS << "\t.private_extern\t";
+    FlagMap[Symbol] |= EHPrivateExtern;
+    break;
   case MCSA_Protected:      OS << "\t.protected\t";       break;
   case MCSA_Reference:      OS << "\t.reference\t";       break;
   case MCSA_Weak:           OS << "\t.weak\t";            break;
-  case MCSA_WeakDefinition: OS << "\t.weak_definition\t"; break;
+  case MCSA_WeakDefinition:
+    OS << "\t.weak_definition\t";
+    FlagMap[Symbol] |= EHWeakDefinition;
+    break;
       // .weak_reference
   case MCSA_WeakReference:  OS << MAI.getWeakRefDirective(); break;
   case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break;
@@ -512,9 +566,8 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size,
 }
 
 void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
-                                  bool isPCRel, unsigned AddrSpace) {
+                                  unsigned AddrSpace) {
   assert(getCurrentSection() && "Cannot emit contents before setting section!");
-  assert(!isPCRel && "Cannot emit pc relative relocations!");
   const char *Directive = 0;
   switch (Size) {
   default: break;
@@ -543,10 +596,10 @@ void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
   EmitEOL();
 }
 
-void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace) {
+void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
   int64_t IntValue;
   if (Value->EvaluateAsAbsolute(IntValue)) {
-    EmitULEB128IntValue(IntValue, AddrSpace);
+    EmitULEB128IntValue(IntValue);
     return;
   }
   assert(MAI.hasLEB128() && "Cannot print a .uleb");
@@ -554,10 +607,10 @@ void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace) {
   EmitEOL();
 }
 
-void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace) {
+void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
   int64_t IntValue;
   if (Value->EvaluateAsAbsolute(IntValue)) {
-    EmitSLEB128IntValue(IntValue, AddrSpace);
+    EmitSLEB128IntValue(IntValue);
     return;
   }
   assert(MAI.hasLEB128() && "Cannot print a .sleb");
@@ -673,9 +726,10 @@ bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){
 void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
                                           unsigned Column, unsigned Flags,
                                           unsigned Isa,
-                                          unsigned Discriminator) {
+                                          unsigned Discriminator,
+                                          StringRef FileName) {
   this->MCStreamer::EmitDwarfLocDirective(FileNo, Line, Column, Flags,
-                                          Isa, Discriminator);
+                                          Isa, Discriminator, FileName);
   if (!UseLoc)
     return;
 
@@ -701,78 +755,144 @@ void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
     OS << "isa " << Isa;
   if (Discriminator)
     OS << "discriminator " << Discriminator;
+
+  if (IsVerboseAsm) {
+    OS.PadToColumn(MAI.getCommentColumn());
+    OS << MAI.getCommentString() << ' ' << FileName << ':' 
+       << Line << ':' << Column;
+  }
   EmitEOL();
 }
 
-bool MCAsmStreamer::EmitCFIStartProc() {
-  if (this->MCStreamer::EmitCFIStartProc())
-    return true;
+void MCAsmStreamer::EmitCFIStartProc() {
+  MCStreamer::EmitCFIStartProc();
+
+  if (!UseCFI)
+    return;
 
   OS << "\t.cfi_startproc";
   EmitEOL();
-
-  return false;
 }
 
-bool MCAsmStreamer::EmitCFIEndProc() {
-  if (this->MCStreamer::EmitCFIEndProc())
-    return true;
+void MCAsmStreamer::EmitCFIEndProc() {
+  MCStreamer::EmitCFIEndProc();
+
+  if (!UseCFI)
+    return;
 
   OS << "\t.cfi_endproc";
   EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
+  MCStreamer::EmitCFIDefCfa(Register, Offset);
+
+  if (!UseCFI)
+    return;
 
-  return false;
+  OS << ".cfi_def_cfa " << Register << ", " << Offset;
+  EmitEOL();
 }
 
-bool MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
-  if (this->MCStreamer::EmitCFIDefCfaOffset(Offset))
-    return true;
+void MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
+  MCStreamer::EmitCFIDefCfaOffset(Offset);
+
+  if (!UseCFI)
+    return;
 
   OS << "\t.cfi_def_cfa_offset " << Offset;
   EmitEOL();
-
-  return false;
 }
 
-bool MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) {
-  if (this->MCStreamer::EmitCFIDefCfaRegister(Register))
-    return true;
+void MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) {
+  MCStreamer::EmitCFIDefCfaRegister(Register);
+
+  if (!UseCFI)
+    return;
 
   OS << "\t.cfi_def_cfa_register " << Register;
   EmitEOL();
-
-  return false;
 }
 
-bool MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
-  if (this->MCStreamer::EmitCFIOffset(Register, Offset))
-    return true;
+void MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
+  this->MCStreamer::EmitCFIOffset(Register, Offset);
+
+  if (!UseCFI)
+    return;
 
   OS << "\t.cfi_offset " << Register << ", " << Offset;
   EmitEOL();
-
-  return false;
 }
 
-bool MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym,
+void MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym,
                                        unsigned Encoding) {
-  if (this->MCStreamer::EmitCFIPersonality(Sym, Encoding))
-    return true;
+  MCStreamer::EmitCFIPersonality(Sym, Encoding);
+
+  if (!UseCFI)
+    return;
 
   OS << "\t.cfi_personality " << Encoding << ", " << *Sym;
   EmitEOL();
-
-  return false;
 }
 
-bool MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
-  if (this->MCStreamer::EmitCFILsda(Sym, Encoding))
-    return true;
+void MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
+  MCStreamer::EmitCFILsda(Sym, Encoding);
+
+  if (!UseCFI)
+    return;
 
   OS << "\t.cfi_lsda " << Encoding << ", " << *Sym;
   EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIRememberState() {
+  MCStreamer::EmitCFIRememberState();
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_remember_state";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIRestoreState() {
+  MCStreamer::EmitCFIRestoreState();
+
+  if (!UseCFI)
+    return;
 
-  return false;
+  OS << "\t.cfi_restore_state";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFISameValue(int64_t Register) {
+  MCStreamer::EmitCFISameValue(Register);
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_same_value " << Register;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) {
+  MCStreamer::EmitCFIRelOffset(Register, Offset);
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_rel_offset " << Register << ", " << Offset;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) {
+  MCStreamer::EmitCFIAdjustCfaOffset(Adjustment);
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_adjust_cfa_offset " << Adjustment;
+  EmitEOL();
 }
 
 void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
@@ -834,13 +954,13 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
       OS << "0b";
       for (unsigned j = 8; j--;) {
         unsigned Bit = (Code[i] >> j) & 1;
-        
+
         unsigned FixupBit;
         if (getContext().getTargetAsmInfo().isLittleEndian())
           FixupBit = i * 8 + j;
         else
           FixupBit = i * 8 + (7-j);
-        
+
         if (uint8_t MapEntry = FixupMap[FixupBit]) {
           assert(Bit == 0 && "Encoder wrote into fixed up bit!");
           OS << char('A' + MapEntry - 1);
@@ -859,12 +979,64 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
   }
 }
 
+void MCAsmStreamer::EmitFnStart() {
+  OS << "\t.fnstart";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitFnEnd() {
+  OS << "\t.fnend";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCantUnwind() {
+  OS << "\t.cantunwind";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitHandlerData() {
+  OS << "\t.handlerdata";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitPersonality(const MCSymbol *Personality) {
+  OS << "\t.personality " << Personality->getName();
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) {
+  OS << "\t.setfp\t" << InstPrinter->getRegName(FpReg)
+     << ", "        << InstPrinter->getRegName(SpReg);
+  if (Offset)
+    OS << ", #" << Offset;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitPad(int64_t Offset) {
+  OS << "\t.pad\t#" << Offset;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+                                bool isVector) {
+  assert(RegList.size() && "RegList should not be empty");
+  if (isVector)
+    OS << "\t.vsave\t{";
+  else
+    OS << "\t.save\t{";
+
+  OS << InstPrinter->getRegName(RegList[0]);
+
+  for (unsigned i = 1, e = RegList.size(); i != e; ++i)
+    OS << ", " << InstPrinter->getRegName(RegList[i]);
+
+  OS << "}";
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
   assert(getCurrentSection() && "Cannot emit contents before setting section!");
 
-  if (!UseLoc)
-    MCLineEntry::Make(this, getCurrentSection());
-
   // Show the encoding in a comment if we have a code emitter.
   if (Emitter)
     AddEncodingComment(Inst);
@@ -897,13 +1069,17 @@ void MCAsmStreamer::Finish() {
   // Dump out the dwarf file & directory tables and line tables.
   if (getContext().hasDwarfFiles() && !UseLoc)
     MCDwarfFileTable::Emit(this);
+
+  if (getNumFrameInfos() && !UseCFI)
+    MCDwarfFrameEmitter::Emit(*this, false);
 }
 
 MCStreamer *llvm::createAsmStreamer(MCContext &Context,
                                     formatted_raw_ostream &OS,
                                     bool isVerboseAsm, bool useLoc,
+                                    bool useCFI,
                                     MCInstPrinter *IP, MCCodeEmitter *CE,
                                     TargetAsmBackend *TAB, bool ShowInst) {
-  return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
+  return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI,
                            IP, CE, TAB, ShowInst);
 }
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 9992646..8360fc9f 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -28,7 +28,6 @@
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetAsmBackend.h"
 
-#include <vector>
 using namespace llvm;
 
 namespace {
@@ -103,6 +102,33 @@ uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
 }
 
 uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
+  const MCSymbol &S = SD->getSymbol();
+
+  // If this is a variable, then recursively evaluate now.
+  if (S.isVariable()) {
+    MCValue Target;
+    if (!S.getVariableValue()->EvaluateAsRelocatable(Target, *this))
+      report_fatal_error("unable to evaluate offset for variable '" +
+                         S.getName() + "'");
+
+    // Verify that any used symbols are defined.
+    if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
+      report_fatal_error("unable to evaluate offset to undefined symbol '" +
+                         Target.getSymA()->getSymbol().getName() + "'");
+    if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
+      report_fatal_error("unable to evaluate offset to undefined symbol '" +
+                         Target.getSymB()->getSymbol().getName() + "'");
+      
+    uint64_t Offset = Target.getConstant();
+    if (Target.getSymA())
+      Offset += getSymbolOffset(&Assembler.getSymbolData(
+                                  Target.getSymA()->getSymbol()));
+    if (Target.getSymB())
+      Offset -= getSymbolOffset(&Assembler.getSymbolData(
+                                  Target.getSymB()->getSymbol()));
+    return Offset;
+  }
+
   assert(SD->getFragment() && "Invalid getOffset() on undefined symbol!");
   return getFragmentOffset(SD->getFragment()) + SD->getOffset();
 }
@@ -692,7 +718,9 @@ bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
 bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
   int64_t Value = 0;
   uint64_t OldSize = LF.getContents().size();
-  LF.getValue().EvaluateAsAbsolute(Value, Layout);
+  bool IsAbs = LF.getValue().EvaluateAsAbsolute(Value, Layout);
+  (void)IsAbs;
+  assert(IsAbs);
   SmallString<8> &Data = LF.getContents();
   Data.clear();
   raw_svector_ostream OSE(Data);
@@ -731,7 +759,8 @@ bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
   SmallString<8> &Data = DF.getContents();
   Data.clear();
   raw_svector_ostream OSE(Data);
-  MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OSE);
+  const TargetAsmInfo &AsmInfo = getContext().getTargetAsmInfo();
+  MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OSE, AsmInfo);
   OSE.flush();
   return OldSize != Data.size();
 }
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 018f00c..8faa72e 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -27,8 +27,11 @@ typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
 
 
 MCContext::MCContext(const MCAsmInfo &mai, const TargetAsmInfo *tai) :
-  MAI(mai), TAI(tai), NextUniqueID(0),
-  CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0) {
+  MAI(mai), TAI(tai),
+  Allocator(), Symbols(Allocator), UsedNames(Allocator),
+  NextUniqueID(0),
+  CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0),
+  AllowTemporaryLabels(true) {
   MachOUniquingMap = 0;
   ELFUniquingMap = 0;
   COFFUniquingMap = 0;
@@ -76,18 +79,19 @@ MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
 }
 
 MCSymbol *MCContext::CreateSymbol(StringRef Name) {
-  // Determine whether this is an assembler temporary or normal label.
-  bool isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix());
+  // Determine whether this is an assembler temporary or normal label, if used.
+  bool isTemporary = false;
+  if (AllowTemporaryLabels)
+    isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix());
 
   StringMapEntry<bool> *NameEntry = &UsedNames.GetOrCreateValue(Name);
   if (NameEntry->getValue()) {
     assert(isTemporary && "Cannot rename non temporary symbols");
-    SmallString<128> NewName;
+    SmallString<128> NewName = Name;
     do {
-      Twine T = Name + Twine(NextUniqueID++);
-      T.toVector(NewName);
-      StringRef foo = NewName;
-      NameEntry = &UsedNames.GetOrCreateValue(foo);
+      NewName.resize(Name.size());
+      raw_svector_ostream(NewName) << NextUniqueID++;
+      NameEntry = &UsedNames.GetOrCreateValue(NewName);
     } while (NameEntry->getValue());
   }
   NameEntry->setValue(true);
@@ -107,9 +111,8 @@ MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
 
 MCSymbol *MCContext::CreateTempSymbol() {
   SmallString<128> NameSV;
-  Twine Name = Twine(MAI.getPrivateGlobalPrefix()) + "tmp" +
-    Twine(NextUniqueID++);
-  Name.toVector(NameSV);
+  raw_svector_ostream(NameSV)
+    << MAI.getPrivateGlobalPrefix() << "tmp" << NextUniqueID++;
   return CreateSymbol(NameSV);
 }
 
diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt
index 5fa7b70..0ce359d 100644
--- a/lib/MC/MCDisassembler/CMakeLists.txt
+++ b/lib/MC/MCDisassembler/CMakeLists.txt
@@ -1,7 +1,8 @@
 
 add_llvm_library(LLVMMCDisassembler
+  Disassembler.cpp
   EDDisassembler.cpp
-  EDOperand.cpp
   EDInst.cpp
+  EDOperand.cpp
   EDToken.cpp
   )
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
new file mode 100644
index 0000000..ced57e8
--- /dev/null
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -0,0 +1,171 @@
+//===-- lib/MC/Disassembler.cpp - Disassembler Public C Interface -*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "Disassembler.h"
+#include <stdio.h>
+#include "llvm-c/Disassembler.h"
+
+#include <string>
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmInfo.h"  // FIXME.
+#include "llvm/Target/TargetMachine.h"  // FIXME.
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Support/MemoryObject.h"
+
+namespace llvm {
+class Target;
+} // namespace llvm
+using namespace llvm;
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//
+// LLVMCreateDisasm() creates a disassembler for the TripleName.  Symbolic
+// disassembly is supported by passing a block of information in the DisInfo
+// parameter and specifing the TagType and call back functions as described in
+// the header llvm-c/Disassembler.h .  The pointer to the block and the 
+// functions can all be passed as NULL.  If successful this returns a
+// disassembler context if not it returns NULL.
+//
+LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
+                                      int TagType, LLVMOpInfoCallback GetOpInfo,
+                                      LLVMSymbolLookupCallback SymbolLookUp) {
+  // Initialize targets and assembly printers/parsers.
+  llvm::InitializeAllTargetInfos();
+  // FIXME: We shouldn't need to initialize the Target(Machine)s.
+  llvm::InitializeAllTargets();
+  llvm::InitializeAllAsmPrinters();
+  llvm::InitializeAllAsmParsers();
+  llvm::InitializeAllDisassemblers();
+
+  // Get the target.
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+  assert(TheTarget && "Unable to create target!");
+
+  // Get the assembler info needed to setup the MCContext.
+  const MCAsmInfo *MAI = TheTarget->createAsmInfo(TripleName);
+  assert(MAI && "Unable to create target asm info!");
+
+  // Package up features to be passed to target/subtarget
+  std::string FeaturesStr;
+
+  // FIXME: We shouldn't need to do this (and link in codegen).
+  //        When we split this out, we should do it in a way that makes
+  //        it straightforward to switch subtargets on the fly.
+  TargetMachine *TM = TheTarget->createTargetMachine(TripleName, FeaturesStr);
+  assert(TM && "Unable to create target machine!");
+
+  // Get the target assembler info needed to setup the context.
+  const TargetAsmInfo *tai = new TargetAsmInfo(*TM);
+  assert(tai && "Unable to create target assembler!");
+
+  // Set up the MCContext for creating symbols and MCExpr's.
+  MCContext *Ctx = new MCContext(*MAI, tai);
+  assert(Ctx && "Unable to create MCContext!");
+
+  // Set up disassembler.
+  MCDisassembler *DisAsm = TheTarget->createMCDisassembler();
+  assert(DisAsm && "Unable to create disassembler!");
+  DisAsm->setupForSymbolicDisassembly(GetOpInfo, DisInfo, Ctx);
+
+  // Set up the instruction printer.
+  int AsmPrinterVariant = MAI->getAssemblerDialect();
+  MCInstPrinter *IP = TheTarget->createMCInstPrinter(*TM, AsmPrinterVariant,
+                                                     *MAI);
+  assert(IP && "Unable to create instruction printer!");
+
+  LLVMDisasmContext *DC = new LLVMDisasmContext(TripleName, DisInfo, TagType,
+                                                GetOpInfo, SymbolLookUp,
+                                                TheTarget, MAI, TM, tai, Ctx,
+                                                DisAsm, IP);
+  assert(DC && "Allocation failure!");
+  return DC;
+}
+
+//
+// LLVMDisasmDispose() disposes of the disassembler specified by the context.
+//
+void LLVMDisasmDispose(LLVMDisasmContextRef DCR){
+  LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+  delete DC;
+}
+
+namespace {
+//
+// The memory object created by LLVMDisasmInstruction().
+//
+class DisasmMemoryObject : public MemoryObject {
+private:
+  uint8_t *Bytes;
+  uint64_t Size;
+  uint64_t BasePC;
+public:
+  DisasmMemoryObject(uint8_t *bytes, uint64_t size, uint64_t basePC) :
+                     Bytes(bytes), Size(size), BasePC(basePC) {}
+ 
+  uint64_t getBase() const { return BasePC; }
+  uint64_t getExtent() const { return Size; }
+
+  int readByte(uint64_t Addr, uint8_t *Byte) const {
+    if (Addr - BasePC >= Size)
+      return -1;
+    *Byte = Bytes[Addr - BasePC];
+    return 0;
+  }
+};
+} // namespace
+
+//
+// LLVMDisasmInstruction() disassembles a single instruction using the
+// disassembler context specified in the parameter DC.  The bytes of the
+// instruction are specified in the parameter Bytes, and contains at least
+// BytesSize number of bytes.  The instruction is at the address specified by
+// the PC parameter.  If a valid instruction can be disassembled its string is
+// returned indirectly in OutString which whos size is specified in the
+// parameter OutStringSize.  This function returns the number of bytes in the
+// instruction or zero if there was no valid instruction.  If this function
+// returns zero the caller will have to pick how many bytes they want to step
+// over by printing a .byte, .long etc. to continue.
+//
+size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
+                             uint64_t BytesSize, uint64_t PC, char *OutString,
+                             size_t OutStringSize){
+  LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+  // Wrap the pointer to the Bytes, BytesSize and PC in a MemoryObject.
+  DisasmMemoryObject MemoryObject(Bytes, BytesSize, PC);
+
+  uint64_t Size;
+  MCInst Inst;
+  const MCDisassembler *DisAsm = DC->getDisAsm();
+  MCInstPrinter *IP = DC->getIP();
+  if (!DisAsm->getInstruction(Inst, Size, MemoryObject, PC, /*REMOVE*/ nulls()))
+    return 0;
+
+  std::string InsnStr;
+  raw_string_ostream OS(InsnStr);
+  IP->printInst(&Inst, OS);
+  OS.flush();
+
+  size_t OutputSize = std::min(OutStringSize-1, InsnStr.size());
+  std::memcpy(OutString, InsnStr.data(), OutputSize);
+  OutString[OutputSize] = '\0'; // Terminate string.
+
+  return Size;
+}
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h
new file mode 100644
index 0000000..f0ec42a
--- /dev/null
+++ b/lib/MC/MCDisassembler/Disassembler.h
@@ -0,0 +1,96 @@
+//===------------- Disassembler.h - LLVM Disassembler -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Disassembly library's disassembler 
+// context.  The disassembler is responsible for producing strings for
+// individual instructions according to a given architecture and disassembly
+// syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_DISASSEMBLER_H
+#define LLVM_MC_DISASSEMBLER_H
+
+#include "llvm-c/Disassembler.h"
+#include <string>
+#include "llvm/ADT/OwningPtr.h"
+
+namespace llvm {
+class TargetAsmInfo;
+class MCContext;
+class MCAsmInfo;
+class MCDisassembler;
+class MCInstPrinter; 
+class Target;
+class TargetMachine;
+
+//
+// This is the disassembler context returned by LLVMCreateDisasm().
+//
+class LLVMDisasmContext {
+private:
+  //
+  // The passed parameters when the disassembler context is created.
+  //
+  // The TripleName for this disassembler.
+  std::string TripleName;
+  // The pointer to the caller's block of symbolic information.
+  void *DisInfo;
+  // The Triple specific symbolic information type returned by GetOpInfo.
+  int TagType;
+  // The function to get the symbolic information for operands.
+  LLVMOpInfoCallback GetOpInfo;
+  // The function to look up a symbol name.
+  LLVMSymbolLookupCallback SymbolLookUp;
+  //
+  // The objects created and saved by LLVMCreateDisasm() then used by
+  // LLVMDisasmInstruction().
+  //
+  // The LLVM target corresponding to the disassembler.
+  // FIXME: using llvm::OwningPtr<const llvm::Target> causes a malloc error
+  //        when this LLVMDisasmContext is deleted.
+  const Target *TheTarget;
+  // The assembly information for the target architecture.
+  llvm::OwningPtr<const llvm::MCAsmInfo> MAI;
+  // The target machine instance.
+  llvm::OwningPtr<llvm::TargetMachine> TM;
+  // The disassembler for the target architecture.
+  // FIXME: using llvm::OwningPtr<const llvm::TargetAsmInfo> causes a malloc
+  //        error when this LLVMDisasmContext is deleted.
+  const TargetAsmInfo *Tai;
+  // The assembly context for creating symbols and MCExprs.
+  llvm::OwningPtr<const llvm::MCContext> Ctx;
+  // The disassembler for the target architecture.
+  llvm::OwningPtr<const llvm::MCDisassembler> DisAsm;
+  // The instruction printer for the target architecture.
+  llvm::OwningPtr<llvm::MCInstPrinter> IP;
+
+public:
+  LLVMDisasmContext(std::string tripleName, void *disInfo, int tagType,
+                    LLVMOpInfoCallback getOpInfo,
+                    LLVMSymbolLookupCallback symbolLookUp,
+                    const Target *theTarget, const MCAsmInfo *mAI,
+                    llvm::TargetMachine *tM, const TargetAsmInfo *tai,
+                    llvm::MCContext *ctx, const MCDisassembler *disAsm,
+                    MCInstPrinter *iP) : TripleName(tripleName),
+                    DisInfo(disInfo), TagType(tagType), GetOpInfo(getOpInfo),
+                    SymbolLookUp(symbolLookUp), TheTarget(theTarget), Tai(tai) {
+    TM.reset(tM);
+    MAI.reset(mAI);
+    Ctx.reset(ctx);
+    DisAsm.reset(disAsm);
+    IP.reset(iP);
+  }
+  const MCDisassembler *getDisAsm() const { return DisAsm.get(); }
+  MCInstPrinter *getIP() { return IP.get(); }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp
index 2fd14db..91c5284 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.cpp
+++ b/lib/MC/MCDisassembler/EDDisassembler.cpp
@@ -193,7 +193,8 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
   
   InstString.reset(new std::string);
   InstStream.reset(new raw_string_ostream(*InstString));
-  InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
+  InstPrinter.reset(Tgt->createMCInstPrinter(*TargetMachine, LLVMSyntaxVariant,
+                                             *AsmInfo));
   
   if (!InstPrinter)
     return;
@@ -253,9 +254,11 @@ EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
     delete inst;
     return NULL;
   } else {
-    const llvm::EDInstInfo *thisInstInfo;
+    const llvm::EDInstInfo *thisInstInfo = NULL;
 
-    thisInstInfo = &InstInfos[inst->getOpcode()];
+    if (InstInfos) {
+      thisInstInfo = &InstInfos[inst->getOpcode()];
+    }
     
     EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
     return sdInst;
@@ -331,6 +334,15 @@ int EDDisassembler::printInst(std::string &str, MCInst &inst) {
   return 0;
 }
 
+static void diag_handler(const SMDiagnostic &diag,
+                         void *context)
+{
+  if (context) {
+    EDDisassembler *disassembler = static_cast<EDDisassembler*>(context);
+    diag.Print("", disassembler->ErrorStream);
+  }
+}
+
 int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
                               SmallVectorImpl<AsmToken> &tokens,
                               const std::string &str) {
@@ -353,6 +365,7 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
   SMLoc instLoc;
   
   SourceMgr sourceMgr;
+  sourceMgr.setDiagHandler(diag_handler, static_cast<void*>(this));
   sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
   MCContext context(*AsmInfo, NULL);
   OwningPtr<MCStreamer> streamer(createNullStreamer(context));
diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h
index 71e45f0..2fcc09d 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.h
+++ b/lib/MC/MCDisassembler/EDDisassembler.h
@@ -87,13 +87,8 @@ struct EDDisassembler {
     
     /// operator< - Less-than operator
     bool operator<(const CPUKey &key) const {
-      if(Arch > key.Arch)
-        return false;
-      else if (Arch == key.Arch) {
-        if(Syntax > key.Syntax)
-          return false;
-      }
-      return true;
+      return ((Arch < key.Arch) ||
+              ((Arch == key.Arch) && Syntax < (key.Syntax)));
     }
   };
   
diff --git a/lib/MC/MCDisassembler/EDInfo.h b/lib/MC/MCDisassembler/EDInfo.h
index 627c066..ad57282 100644
--- a/lib/MC/MCDisassembler/EDInfo.h
+++ b/lib/MC/MCDisassembler/EDInfo.h
@@ -35,6 +35,7 @@ enum OperandTypes {
   kOperandTypeARMAddrMode5,
   kOperandTypeARMAddrMode6,
   kOperandTypeARMAddrMode6Offset,
+  kOperandTypeARMAddrMode7,
   kOperandTypeARMAddrModePC,
   kOperandTypeARMRegisterList,
   kOperandTypeARMTBAddrMode,
@@ -51,7 +52,8 @@ enum OperandTypes {
   kOperandTypeThumb2AddrModeImm12,
   kOperandTypeThumb2AddrModeSoReg,
   kOperandTypeThumb2AddrModeImm8s4,
-  kOperandTypeThumb2AddrModeImm8s4Offset
+  kOperandTypeThumb2AddrModeImm8s4Offset,
+  kOperandTypeThumb2AddrModeReg
 };
 
 enum OperandFlags {
diff --git a/lib/MC/MCDisassembler/EDInst.cpp b/lib/MC/MCDisassembler/EDInst.cpp
index 63b049f..6057e16 100644
--- a/lib/MC/MCDisassembler/EDInst.cpp
+++ b/lib/MC/MCDisassembler/EDInst.cpp
@@ -165,6 +165,9 @@ int EDInst::getOperand(EDOperand *&operand, unsigned int index) {
 int EDInst::tokenize() {
   if (TokenizeResult.valid())
     return TokenizeResult.result();
+    
+  if (ThisInstInfo == NULL)
+    return TokenizeResult.setResult(-1);
   
   if (stringify())
     return TokenizeResult.setResult(-1);
diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp
index 2b0c73e..492bb08 100644
--- a/lib/MC/MCDisassembler/EDOperand.cpp
+++ b/lib/MC/MCDisassembler/EDOperand.cpp
@@ -73,6 +73,8 @@ EDOperand::EDOperand(const EDDisassembler &disassembler,
     case kOperandTypeThumb2AddrModeImm8Offset:
     case kOperandTypeARMTBAddrMode:
     case kOperandTypeThumb2AddrModeImm8s4Offset:
+    case kOperandTypeARMAddrMode7:
+    case kOperandTypeThumb2AddrModeReg:
       numMCOperands = 1;
       break;
     case kOperandTypeThumb2SoReg:
@@ -196,15 +198,24 @@ int EDOperand::evaluate(uint64_t &result,
     default:
       return -1;
     case kOperandTypeImmediate:
+      if (!Inst.Inst->getOperand(MCOpIndex).isImm())
+        return -1;
+            
       result = Inst.Inst->getOperand(MCOpIndex).getImm();
       return 0;
     case kOperandTypeRegister:
     {
+      if (!Inst.Inst->getOperand(MCOpIndex).isReg())
+        return -1;
+        
       unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
       return callback(&result, reg, arg);
     }
     case kOperandTypeARMBranchTarget:
     {
+      if (!Inst.Inst->getOperand(MCOpIndex).isImm())
+        return -1;
+        
       int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
       
       uint64_t pcVal;
@@ -256,6 +267,7 @@ int EDOperand::isMemory() {
   case kOperandTypeARMAddrMode4:
   case kOperandTypeARMAddrMode5:
   case kOperandTypeARMAddrMode6:
+  case kOperandTypeARMAddrMode7:
   case kOperandTypeARMAddrModePC:
   case kOperandTypeARMBranchTarget:
   case kOperandTypeThumbAddrModeS1:
@@ -269,6 +281,7 @@ int EDOperand::isMemory() {
   case kOperandTypeThumb2AddrModeImm12:
   case kOperandTypeThumb2AddrModeSoReg:
   case kOperandTypeThumb2AddrModeImm8s4:
+  case kOperandTypeThumb2AddrModeReg:
     return 1;
   }
 }
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 112d7d8..f61f0c2 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -17,6 +17,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -439,14 +440,100 @@ static int getDataAlignmentFactor(MCStreamer &streamer) {
    return -size;
 }
 
-static void EmitCFIInstruction(MCStreamer &Streamer,
-                               const MCCFIInstruction &Instr) {
+static unsigned getSizeForEncoding(MCStreamer &streamer,
+                                   unsigned symbolEncoding) {
+  MCContext &context = streamer.getContext();
+  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  unsigned format = symbolEncoding & 0x0f;
+  switch (format) {
+  default:
+    assert(0 && "Unknown Encoding");
+  case dwarf::DW_EH_PE_absptr:
+  case dwarf::DW_EH_PE_signed:
+    return asmInfo.getPointerSize();
+  case dwarf::DW_EH_PE_udata2:
+  case dwarf::DW_EH_PE_sdata2:
+    return 2;
+  case dwarf::DW_EH_PE_udata4:
+  case dwarf::DW_EH_PE_sdata4:
+    return 4;
+  case dwarf::DW_EH_PE_udata8:
+  case dwarf::DW_EH_PE_sdata8:
+    return 8;
+  }
+}
+
+static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
+                       unsigned symbolEncoding) {
+  MCContext &context = streamer.getContext();
+  const MCAsmInfo &asmInfo = context.getAsmInfo();
+  const MCExpr *v = asmInfo.getExprForFDESymbol(&symbol,
+                                                symbolEncoding,
+                                                streamer);
+  unsigned size = getSizeForEncoding(streamer, symbolEncoding);
+  streamer.EmitAbsValue(v, size);
+}
+
+static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
+                            unsigned symbolEncoding) {
+  MCContext &context = streamer.getContext();
+  const MCAsmInfo &asmInfo = context.getAsmInfo();
+  const MCExpr *v = asmInfo.getExprForPersonalitySymbol(&symbol,
+                                                        symbolEncoding,
+                                                        streamer);
+  unsigned size = getSizeForEncoding(streamer, symbolEncoding);
+  streamer.EmitValue(v, size);
+}
+
+static const MachineLocation TranslateMachineLocation(
+                                                  const TargetAsmInfo &AsmInfo,
+                                                  const MachineLocation &Loc) {
+  unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ?
+    MachineLocation::VirtualFP :
+    unsigned(AsmInfo.getDwarfRegNum(Loc.getReg(), true));
+  const MachineLocation &NewLoc = Loc.isReg() ?
+    MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset());
+  return NewLoc;
+}
+
+namespace {
+  class FrameEmitterImpl {
+    int CFAOffset;
+    int CIENum;
+    bool UsingCFI;
+
+  public:
+    FrameEmitterImpl(bool usingCFI) : CFAOffset(0), CIENum(0),
+                     UsingCFI(usingCFI) {
+    }
+
+    const MCSymbol &EmitCIE(MCStreamer &streamer,
+                            const MCSymbol *personality,
+                            unsigned personalityEncoding,
+                            const MCSymbol *lsda,
+                            unsigned lsdaEncoding);
+    MCSymbol *EmitFDE(MCStreamer &streamer,
+                      const MCSymbol &cieStart,
+                      const MCDwarfFrameInfo &frame,
+                      bool forceLsda);
+    void EmitCFIInstructions(MCStreamer &streamer,
+                             const std::vector<MCCFIInstruction> &Instrs,
+                             MCSymbol *BaseLabel);
+    void EmitCFIInstruction(MCStreamer &Streamer,
+                            const MCCFIInstruction &Instr);
+  };
+}
+
+void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
+                                          const MCCFIInstruction &Instr) {
   int dataAlignmentFactor = getDataAlignmentFactor(Streamer);
 
   switch (Instr.getOperation()) {
-  case MCCFIInstruction::Move: {
+  case MCCFIInstruction::Move:
+  case MCCFIInstruction::RelMove: {
     const MachineLocation &Dst = Instr.getDestination();
     const MachineLocation &Src = Instr.getSource();
+    const bool IsRelative = Instr.getOperation() == MCCFIInstruction::RelMove;
 
     // If advancing cfa.
     if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
@@ -459,7 +546,12 @@ static void EmitCFIInstruction(MCStreamer &Streamer,
         Streamer.EmitULEB128IntValue(Src.getReg());
       }
 
-      Streamer.EmitULEB128IntValue(-Src.getOffset(), 1);
+      if (IsRelative)
+        CFAOffset += Src.getOffset();
+      else
+        CFAOffset = -Src.getOffset();
+
+      Streamer.EmitULEB128IntValue(CFAOffset);
       return;
     }
 
@@ -471,7 +563,11 @@ static void EmitCFIInstruction(MCStreamer &Streamer,
     }
 
     unsigned Reg = Src.getReg();
-    int Offset = Dst.getOffset() / dataAlignmentFactor;
+
+    int Offset = Dst.getOffset();
+    if (IsRelative)
+      Offset -= CFAOffset;
+    Offset = Offset / dataAlignmentFactor;
 
     if (Offset < 0) {
       Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended_sf, 1);
@@ -479,11 +575,11 @@ static void EmitCFIInstruction(MCStreamer &Streamer,
       Streamer.EmitSLEB128IntValue(Offset);
     } else if (Reg < 64) {
       Streamer.EmitIntValue(dwarf::DW_CFA_offset + Reg, 1);
-      Streamer.EmitULEB128IntValue(Offset, 1);
+      Streamer.EmitULEB128IntValue(Offset);
     } else {
       Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended, 1);
-      Streamer.EmitULEB128IntValue(Reg, 1);
-      Streamer.EmitULEB128IntValue(Offset, 1);
+      Streamer.EmitULEB128IntValue(Reg);
+      Streamer.EmitULEB128IntValue(Offset);
     }
     return;
   }
@@ -493,15 +589,21 @@ static void EmitCFIInstruction(MCStreamer &Streamer,
   case MCCFIInstruction::Restore:
     Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1);
     return;
+  case MCCFIInstruction::SameValue: {
+    unsigned Reg = Instr.getDestination().getReg();
+    Streamer.EmitIntValue(dwarf::DW_CFA_same_value, 1);
+    Streamer.EmitULEB128IntValue(Reg);
+    return;
+  }
   }
   llvm_unreachable("Unhandled case in switch");
 }
 
 /// EmitFrameMoves - Emit frame instructions to describe the layout of the
 /// frame.
-static void EmitCFIInstructions(MCStreamer &streamer,
-                                const std::vector<MCCFIInstruction> &Instrs,
-                                MCSymbol *BaseLabel) {
+void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
+                                    const std::vector<MCCFIInstruction> &Instrs,
+                                           MCSymbol *BaseLabel) {
   for (unsigned i = 0, N = Instrs.size(); i < N; ++i) {
     const MCCFIInstruction &Instr = Instrs[i];
     MCSymbol *Label = Instr.getLabel();
@@ -521,74 +623,31 @@ static void EmitCFIInstructions(MCStreamer &streamer,
   }
 }
 
-static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
-                       unsigned symbolEncoding) {
-  MCContext &context = streamer.getContext();
-  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
-  unsigned format = symbolEncoding & 0x0f;
-  unsigned application = symbolEncoding & 0x70;
-  unsigned size;
-  switch (format) {
-  default:
-    assert(0 && "Unknown Encoding");
-  case dwarf::DW_EH_PE_absptr:
-  case dwarf::DW_EH_PE_signed:
-    size = asmInfo.getPointerSize();
-    break;
-  case dwarf::DW_EH_PE_udata2:
-  case dwarf::DW_EH_PE_sdata2:
-    size = 2;
-    break;
-  case dwarf::DW_EH_PE_udata4:
-  case dwarf::DW_EH_PE_sdata4:
-    size = 4;
-    break;
-  case dwarf::DW_EH_PE_udata8:
-  case dwarf::DW_EH_PE_sdata8:
-    size = 8;
-    break;
-  }
-  switch (application) {
-  default:
-    assert(0 && "Unknown Encoding");
-    break;
-  case 0:
-    streamer.EmitSymbolValue(&symbol, size);
-    break;
-  case dwarf::DW_EH_PE_pcrel:
-    streamer.EmitPCRelSymbolValue(&symbol, size);
-    break;
-  }
-}
-
-static const MachineLocation TranslateMachineLocation(
-                                                  const TargetAsmInfo &AsmInfo,
-                                                  const MachineLocation &Loc) {
-  unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ?
-    MachineLocation::VirtualFP :
-    unsigned(AsmInfo.getDwarfRegNum(Loc.getReg(), true));
-  const MachineLocation &NewLoc = Loc.isReg() ?
-    MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset());
-  return NewLoc;
-}
-
-static const MCSymbol &EmitCIE(MCStreamer &streamer,
-                               const MCSymbol *personality,
-                               unsigned personalityEncoding,
-                               const MCSymbol *lsda,
-                               unsigned lsdaEncoding) {
+const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
+                                          const MCSymbol *personality,
+                                          unsigned personalityEncoding,
+                                          const MCSymbol *lsda,
+                                          unsigned lsdaEncoding) {
   MCContext &context = streamer.getContext();
   const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
   const MCSection &section = *asmInfo.getEHFrameSection();
   streamer.SwitchSection(&section);
-  MCSymbol *sectionStart = streamer.getContext().CreateTempSymbol();
+
+  MCSymbol *sectionStart;
+  if (asmInfo.isFunctionEHFrameSymbolPrivate())
+    sectionStart = context.CreateTempSymbol();
+  else
+    sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum));
+
+  CIENum++;
+
   MCSymbol *sectionEnd = streamer.getContext().CreateTempSymbol();
 
   // Length
   const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart,
                                                *sectionEnd, 4);
   streamer.EmitLabel(sectionStart);
-  streamer.EmitValue(Length, 4);
+  streamer.EmitAbsValue(Length, 4);
 
   // CIE ID
   streamer.EmitIntValue(0, 4);
@@ -617,28 +676,35 @@ static const MCSymbol &EmitCIE(MCStreamer &streamer,
   streamer.EmitULEB128IntValue(asmInfo.getDwarfRARegNum(true));
 
   // Augmentation Data Length (optional)
-  MCSymbol *augmentationStart = streamer.getContext().CreateTempSymbol();
-  MCSymbol *augmentationEnd = streamer.getContext().CreateTempSymbol();
-  const MCExpr *augmentationLength = MakeStartMinusEndExpr(streamer,
-                                                           *augmentationStart,
-                                                           *augmentationEnd, 0);
-  streamer.EmitULEB128Value(augmentationLength);
+
+  unsigned augmentationLength = 0;
+  if (personality) {
+    // Personality Encoding
+    augmentationLength += 1;
+    // Personality
+    augmentationLength += getSizeForEncoding(streamer, personalityEncoding);
+  }
+  if (lsda) {
+    augmentationLength += 1;
+  }
+  // Encoding of the FDE pointers
+  augmentationLength += 1;
+
+  streamer.EmitULEB128IntValue(augmentationLength);
 
   // Augmentation Data (optional)
-  streamer.EmitLabel(augmentationStart);
   if (personality) {
     // Personality Encoding
     streamer.EmitIntValue(personalityEncoding, 1);
     // Personality
-    EmitSymbol(streamer, *personality, personalityEncoding);
+    EmitPersonality(streamer, *personality, personalityEncoding);
   }
   if (lsda) {
     // LSDA Encoding
     streamer.EmitIntValue(lsdaEncoding, 1);
   }
   // Encoding of the FDE pointers
-  streamer.EmitIntValue(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4, 1);
-  streamer.EmitLabel(augmentationEnd);
+  streamer.EmitIntValue(asmInfo.getFDEEncoding(UsingCFI), 1);
 
   // Initial Instructions
 
@@ -664,50 +730,66 @@ static const MCSymbol &EmitCIE(MCStreamer &streamer,
   return *sectionStart;
 }
 
-static MCSymbol *EmitFDE(MCStreamer &streamer,
-                         const MCSymbol &cieStart,
-                         const MCDwarfFrameInfo &frame) {
+MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
+                                    const MCSymbol &cieStart,
+                                    const MCDwarfFrameInfo &frame,
+                                    bool forceLsda) {
   MCContext &context = streamer.getContext();
   MCSymbol *fdeStart = context.CreateTempSymbol();
   MCSymbol *fdeEnd = context.CreateTempSymbol();
+  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+
+  if (!asmInfo.isFunctionEHFrameSymbolPrivate()) {
+    Twine EHName = frame.Function->getName() + Twine(".eh");
+    MCSymbol *EHSym = context.GetOrCreateSymbol(EHName);
+    streamer.EmitEHSymAttributes(frame.Function, EHSym);
+    streamer.EmitLabel(EHSym);
+  }
 
   // Length
   const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
-  streamer.EmitValue(Length, 4);
+  streamer.EmitAbsValue(Length, 4);
 
   streamer.EmitLabel(fdeStart);
   // CIE Pointer
   const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart,
                                                0);
-  streamer.EmitValue(offset, 4);
+  streamer.EmitAbsValue(offset, 4);
+  unsigned fdeEncoding = asmInfo.getFDEEncoding(UsingCFI);
+  unsigned size = getSizeForEncoding(streamer, fdeEncoding);
 
   // PC Begin
-  streamer.EmitPCRelSymbolValue(frame.Begin, 4);
+  EmitSymbol(streamer, *frame.Begin, fdeEncoding);
 
   // PC Range
   const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
                                               *frame.End, 0);
-  streamer.EmitValue(Range, 4);
+  streamer.EmitAbsValue(Range, size);
 
   // Augmentation Data Length
-  MCSymbol *augmentationStart = streamer.getContext().CreateTempSymbol();
-  MCSymbol *augmentationEnd = streamer.getContext().CreateTempSymbol();
-  const MCExpr *augmentationLength = MakeStartMinusEndExpr(streamer,
-                                                           *augmentationStart,
-                                                           *augmentationEnd, 0);
-  streamer.EmitULEB128Value(augmentationLength);
+  unsigned augmentationLength = 0;
+
+  if (frame.Lsda || forceLsda)
+    augmentationLength += getSizeForEncoding(streamer, frame.LsdaEncoding);
+
+  streamer.EmitULEB128IntValue(augmentationLength);
 
   // Augmentation Data
-  streamer.EmitLabel(augmentationStart);
+
+  // When running in "CodeGen compatibility mode" a FDE with no LSDA can be
+  // assigned to a CIE that requires one. In that case we output a 0 (as does
+  // CodeGen).
   if (frame.Lsda)
     EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding);
-  streamer.EmitLabel(augmentationEnd);
+  else if (forceLsda)
+    streamer.EmitIntValue(0, getSizeForEncoding(streamer, frame.LsdaEncoding));
+
   // Call Frame Instructions
 
   EmitCFIInstructions(streamer, frame.Instructions, frame.Begin);
 
   // Padding
-  streamer.EmitValueToAlignment(4);
+  streamer.EmitValueToAlignment(size);
 
   return fdeEnd;
 }
@@ -753,11 +835,78 @@ namespace llvm {
   };
 }
 
-void MCDwarfFrameEmitter::Emit(MCStreamer &streamer) {
+// This is an implementation of CIE and FDE emission that is bug by bug
+// compatible with the one in CodeGen. It is useful during the transition
+// to make it easy to compare the outputs, but should probably be removed
+// afterwards.
+void MCDwarfFrameEmitter::EmitDarwin(MCStreamer &streamer,
+                                     bool usingCFI) {
+  FrameEmitterImpl Emitter(usingCFI);
+  DenseMap<const MCSymbol*, const MCSymbol*> Personalities;
+  const MCSymbol *aCIE = NULL;
+  const MCDwarfFrameInfo *aFrame = NULL;
+
+  for (unsigned i = 0, n = streamer.getNumFrameInfos(); i < n; ++i) {
+    const MCDwarfFrameInfo &frame = streamer.getFrameInfo(i);
+    if (!frame.Personality)
+      continue;
+    if (Personalities.count(frame.Personality))
+      continue;
+
+    const MCSymbol *cieStart = &Emitter.EmitCIE(streamer, frame.Personality,
+                                                frame.PersonalityEncoding,
+                                                frame.Lsda,
+                                                frame.LsdaEncoding);
+    aCIE = cieStart;
+    aFrame = &frame;
+    Personalities[frame.Personality] = cieStart;
+  }
+
+  if (Personalities.empty()) {
+    const MCDwarfFrameInfo &frame = streamer.getFrameInfo(0);
+    aCIE = &Emitter.EmitCIE(streamer, frame.Personality,
+                            frame.PersonalityEncoding, frame.Lsda,
+                            frame.LsdaEncoding);
+    aFrame = &frame;
+  }
+
+  MCSymbol *fdeEnd = NULL;
+  for (unsigned i = 0, n = streamer.getNumFrameInfos(); i < n; ++i) {
+    const MCDwarfFrameInfo &frame = streamer.getFrameInfo(i);
+    const MCSymbol *cieStart = Personalities[frame.Personality];
+    bool hasLSDA;
+    if (!cieStart) {
+      cieStart = aCIE;
+      hasLSDA = aFrame->Lsda;
+    } else {
+      hasLSDA = true;
+    }
+
+    fdeEnd = Emitter.EmitFDE(streamer, *cieStart, frame,
+                             hasLSDA);
+    if (i != n - 1)
+      streamer.EmitLabel(fdeEnd);
+  }
+
+  const MCContext &context = streamer.getContext();
+  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  streamer.EmitValueToAlignment(asmInfo.getPointerSize());
+  if (fdeEnd)
+    streamer.EmitLabel(fdeEnd);
+}
+
+void MCDwarfFrameEmitter::Emit(MCStreamer &streamer,
+                               bool usingCFI) {
   const MCContext &context = streamer.getContext();
   const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  if (!asmInfo.isFunctionEHFrameSymbolPrivate()) {
+    EmitDarwin(streamer, usingCFI);
+    return;
+  }
+
   MCSymbol *fdeEnd = NULL;
   DenseMap<CIEKey, const MCSymbol*> CIEStarts;
+  FrameEmitterImpl Emitter(usingCFI);
 
   for (unsigned i = 0, n = streamer.getNumFrameInfos(); i < n; ++i) {
     const MCDwarfFrameInfo &frame = streamer.getFrameInfo(i);
@@ -765,10 +914,10 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &streamer) {
                frame.LsdaEncoding);
     const MCSymbol *&cieStart = CIEStarts[key];
     if (!cieStart)
-      cieStart = &EmitCIE(streamer, frame.Personality,
-                          frame.PersonalityEncoding, frame.Lsda,
-                          frame.LsdaEncoding);
-    fdeEnd = EmitFDE(streamer, *cieStart, frame);
+      cieStart = &Emitter.EmitCIE(streamer, frame.Personality,
+                                  frame.PersonalityEncoding, frame.Lsda,
+                                  frame.LsdaEncoding);
+    fdeEnd = Emitter.EmitFDE(streamer, *cieStart, frame, false);
     if (i != n - 1)
       streamer.EmitLabel(fdeEnd);
   }
@@ -782,21 +931,28 @@ void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer,
                                          uint64_t AddrDelta) {
   SmallString<256> Tmp;
   raw_svector_ostream OS(Tmp);
-  MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OS);
+  const TargetAsmInfo &AsmInfo = Streamer.getContext().getTargetAsmInfo();
+  MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OS, AsmInfo);
   Streamer.EmitBytes(OS.str(), /*AddrSpace=*/0);
 }
 
 void MCDwarfFrameEmitter::EncodeAdvanceLoc(uint64_t AddrDelta,
-                                           raw_ostream &OS) {
+                                           raw_ostream &OS,
+                                           const TargetAsmInfo &AsmInfo) {
+  // This is a small hack to facilitate the transition to CFI on OS X. It
+  // relaxes all address advances which lets us produces identical output
+  // to the one produce by CodeGen.
+  const bool Relax = !AsmInfo.isFunctionEHFrameSymbolPrivate();
+
   // FIXME: Assumes the code alignment factor is 1.
   if (AddrDelta == 0) {
-  } else if (isUIntN(6, AddrDelta)) {
+  } else if (isUIntN(6, AddrDelta) && !Relax) {
     uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta;
     OS << Opcode;
-  } else if (isUInt<8>(AddrDelta)) {
+  } else if (isUInt<8>(AddrDelta) && !Relax) {
     OS << uint8_t(dwarf::DW_CFA_advance_loc1);
     OS << uint8_t(AddrDelta);
-  } else if (isUInt<16>(AddrDelta)) {
+  } else if (isUInt<16>(AddrDelta) && !Relax) {
     // FIXME: check what is the correct behavior on a big endian machine.
     OS << uint8_t(dwarf::DW_CFA_advance_loc2);
     OS << uint8_t( AddrDelta       & 0xff);
diff --git a/lib/MC/MCELF.cpp b/lib/MC/MCELF.cpp
new file mode 100644
index 0000000..ce7783e
--- /dev/null
+++ b/lib/MC/MCELF.cpp
@@ -0,0 +1,72 @@
+//===- lib/MC/MCELF.cpp - MC ELF ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCELF.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+namespace llvm {
+
+void MCELF::SetBinding(MCSymbolData &SD, unsigned Binding) {
+  assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+         Binding == ELF::STB_WEAK);
+  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift);
+  SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift));
+}
+
+unsigned MCELF::GetBinding(const MCSymbolData &SD) {
+  uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift;
+  assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+         Binding == ELF::STB_WEAK);
+  return Binding;
+}
+
+void MCELF::SetType(MCSymbolData &SD, unsigned Type) {
+  assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
+         Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
+         Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
+         Type == ELF::STT_TLS);
+
+  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift);
+  SD.setFlags(OtherFlags | (Type << ELF_STT_Shift));
+}
+
+unsigned MCELF::GetType(const MCSymbolData &SD) {
+  uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift;
+  assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
+         Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
+         Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
+         Type == ELF::STT_TLS);
+  return Type;
+}
+
+void MCELF::SetVisibility(MCSymbolData &SD, unsigned Visibility) {
+  assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+         Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+
+  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STV_Shift);
+  SD.setFlags(OtherFlags | (Visibility << ELF_STV_Shift));
+}
+
+unsigned MCELF::GetVisibility(MCSymbolData &SD) {
+  unsigned Visibility =
+    (SD.getFlags() & (0xf << ELF_STV_Shift)) >> ELF_STV_Shift;
+  assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+         Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+  return Visibility;
+}
+
+}
diff --git a/lib/MC/MCELF.h b/lib/MC/MCELF.h
new file mode 100644
index 0000000..e08f1e6
--- /dev/null
+++ b/lib/MC/MCELF.h
@@ -0,0 +1,35 @@
+//===- lib/MC/MCELF.h - ELF MC --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some support functions used by the ELF Streamer and
+// ObjectWriter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELF_H
+#define LLVM_MC_MCELF_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+class MCSymbolData;
+
+class MCELF {
+ public:
+  static void SetBinding(MCSymbolData &SD, unsigned Binding);
+  static unsigned GetBinding(const MCSymbolData &SD);
+  static void SetType(MCSymbolData &SD, unsigned Type);
+  static unsigned GetType(const MCSymbolData &SD);
+  static void SetVisibility(MCSymbolData &SD, unsigned Visibility);
+  static unsigned GetVisibility(MCSymbolData &SD);
+};
+
+}
+
+#endif
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index e49074d..be8e2e3 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -11,18 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCELFStreamer.h"
+#include "MCELF.h"
 #include "llvm/MC/MCStreamer.h"
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCELFSymbolFlags.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Debug.h"
@@ -34,148 +30,6 @@
 
 using namespace llvm;
 
-namespace {
-
-static void SetBinding(MCSymbolData &SD, unsigned Binding) {
-  assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
-         Binding == ELF::STB_WEAK);
-  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift);
-  SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift));
-}
-
-static unsigned GetBinding(const MCSymbolData &SD) {
-  uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift;
-  assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
-         Binding == ELF::STB_WEAK);
-  return Binding;
-}
-
-static void SetType(MCSymbolData &SD, unsigned Type) {
-  assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
-         Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
-         Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
-         Type == ELF::STT_TLS);
-
-  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift);
-  SD.setFlags(OtherFlags | (Type << ELF_STT_Shift));
-}
-
-static void SetVisibility(MCSymbolData &SD, unsigned Visibility) {
-  assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
-         Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
-
-  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STV_Shift);
-  SD.setFlags(OtherFlags | (Visibility << ELF_STV_Shift));
-}
-
-class MCELFStreamer : public MCObjectStreamer {
-public:
-  MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
-                  raw_ostream &OS, MCCodeEmitter *Emitter)
-    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
-
-  ~MCELFStreamer() {}
-
-  /// @name MCStreamer Interface
-  /// @{
-
-  virtual void InitSections();
-  virtual void ChangeSection(const MCSection *Section);
-  virtual void EmitLabel(MCSymbol *Symbol);
-  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
-  virtual void EmitThumbFunc(MCSymbol *Func);
-  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
-  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
-  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
-  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
-                                unsigned ByteAlignment);
-  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EmitCOFFSymbolType(int Type) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EndCOFFSymbolDef() {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
-     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-     SD.setSize(Value);
-  }
-
-  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
-
-  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
-                            unsigned Size = 0, unsigned ByteAlignment = 0) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
-                              uint64_t Size, unsigned ByteAlignment = 0) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
-  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
-                                    unsigned ValueSize = 1,
-                                    unsigned MaxBytesToEmit = 0);
-  virtual void EmitCodeAlignment(unsigned ByteAlignment,
-                                 unsigned MaxBytesToEmit = 0);
-
-  virtual void EmitFileDirective(StringRef Filename);
-
-  virtual void Finish();
-
-private:
-  virtual void EmitInstToFragment(const MCInst &Inst);
-  virtual void EmitInstToData(const MCInst &Inst);
-
-  void fixSymbolsInTLSFixups(const MCExpr *expr);
-
-  struct LocalCommon {
-    MCSymbolData *SD;
-    uint64_t Size;
-    unsigned ByteAlignment;
-  };
-  std::vector<LocalCommon> LocalCommons;
-
-  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
-  /// @}
-  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
-                  SectionKind Kind) {
-    SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
-  }
-
-  void SetSectionData() {
-    SetSection(".data", ELF::SHT_PROGBITS,
-               ELF::SHF_WRITE |ELF::SHF_ALLOC,
-               SectionKind::getDataRel());
-    EmitCodeAlignment(4, 0);
-  }
-  void SetSectionText() {
-    SetSection(".text", ELF::SHT_PROGBITS,
-               ELF::SHF_EXECINSTR |
-               ELF::SHF_ALLOC, SectionKind::getText());
-    EmitCodeAlignment(4, 0);
-  }
-  void SetSectionBss() {
-    SetSection(".bss", ELF::SHT_NOBITS,
-               ELF::SHF_WRITE |
-               ELF::SHF_ALLOC, SectionKind::getBSS());
-    EmitCodeAlignment(4, 0);
-  }
-};
-
-} // end anonymous namespace.
-
 void MCELFStreamer::InitSections() {
   // This emulates the same behavior of GNU as. This makes it easier
   // to compare the output as the major sections are in the same order.
@@ -194,7 +48,7 @@ void MCELFStreamer::EmitLabel(MCSymbol *Symbol) {
     static_cast<const MCSectionELF&>(Symbol->getSection());
   MCSymbolData &SD = getAssembler().getSymbolData(*Symbol);
   if (Section.getFlags() & ELF::SHF_TLS)
-    SetType(SD, ELF::STT_TLS);
+    MCELF::SetType(SD, ELF::STT_TLS);
 }
 
 void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
@@ -281,54 +135,54 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
     break;
 
   case MCSA_Global:
-    SetBinding(SD, ELF::STB_GLOBAL);
+    MCELF::SetBinding(SD, ELF::STB_GLOBAL);
     SD.setExternal(true);
     BindingExplicitlySet.insert(Symbol);
     break;
 
   case MCSA_WeakReference:
   case MCSA_Weak:
-    SetBinding(SD, ELF::STB_WEAK);
+    MCELF::SetBinding(SD, ELF::STB_WEAK);
     SD.setExternal(true);
     BindingExplicitlySet.insert(Symbol);
     break;
 
   case MCSA_Local:
-    SetBinding(SD, ELF::STB_LOCAL);
+    MCELF::SetBinding(SD, ELF::STB_LOCAL);
     SD.setExternal(false);
     BindingExplicitlySet.insert(Symbol);
     break;
 
   case MCSA_ELF_TypeFunction:
-    SetType(SD, ELF::STT_FUNC);
+    MCELF::SetType(SD, ELF::STT_FUNC);
     break;
 
   case MCSA_ELF_TypeObject:
-    SetType(SD, ELF::STT_OBJECT);
+    MCELF::SetType(SD, ELF::STT_OBJECT);
     break;
 
   case MCSA_ELF_TypeTLS:
-    SetType(SD, ELF::STT_TLS);
+    MCELF::SetType(SD, ELF::STT_TLS);
     break;
 
   case MCSA_ELF_TypeCommon:
-    SetType(SD, ELF::STT_COMMON);
+    MCELF::SetType(SD, ELF::STT_COMMON);
     break;
 
   case MCSA_ELF_TypeNoType:
-    SetType(SD, ELF::STT_NOTYPE);
+    MCELF::SetType(SD, ELF::STT_NOTYPE);
     break;
 
   case MCSA_Protected:
-    SetVisibility(SD, ELF::STV_PROTECTED);
+    MCELF::SetVisibility(SD, ELF::STV_PROTECTED);
     break;
 
   case MCSA_Hidden:
-    SetVisibility(SD, ELF::STV_HIDDEN);
+    MCELF::SetVisibility(SD, ELF::STV_HIDDEN);
     break;
 
   case MCSA_Internal:
-    SetVisibility(SD, ELF::STV_INTERNAL);
+    MCELF::SetVisibility(SD, ELF::STV_INTERNAL);
     break;
   }
 }
@@ -338,13 +192,13 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
   MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
 
   if (!BindingExplicitlySet.count(Symbol)) {
-    SetBinding(SD, ELF::STB_GLOBAL);
+    MCELF::SetBinding(SD, ELF::STB_GLOBAL);
     SD.setExternal(true);
   }
 
-  SetType(SD, ELF::STT_OBJECT);
+  MCELF::SetType(SD, ELF::STT_OBJECT);
 
-  if (GetBinding(SD) == ELF_STB_Local) {
+  if (MCELF::GetBinding(SD) == ELF_STB_Local) {
     const MCSection *Section = getAssembler().getContext().getELFSection(".bss",
                                                                     ELF::SHT_NOBITS,
                                                                     ELF::SHF_WRITE |
@@ -364,7 +218,7 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
 void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
   // FIXME: Should this be caught and done earlier?
   MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-  SetBinding(SD, ELF::STB_LOCAL);
+  MCELF::SetBinding(SD, ELF::STB_LOCAL);
   SD.setExternal(false);
   BindingExplicitlySet.insert(Symbol);
   // FIXME: ByteAlignment is not needed here, but is required.
@@ -437,19 +291,22 @@ void  MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
     switch (symRef.getKind()) {
     default:
       return;
+    case MCSymbolRefExpr::VK_GOTTPOFF:
+    case MCSymbolRefExpr::VK_INDNTPOFF:
     case MCSymbolRefExpr::VK_NTPOFF:
     case MCSymbolRefExpr::VK_GOTNTPOFF:
     case MCSymbolRefExpr::VK_TLSGD:
+    case MCSymbolRefExpr::VK_TLSLD:
     case MCSymbolRefExpr::VK_TLSLDM:
     case MCSymbolRefExpr::VK_TPOFF:
     case MCSymbolRefExpr::VK_DTPOFF:
-    case MCSymbolRefExpr::VK_GOTTPOFF:
-    case MCSymbolRefExpr::VK_TLSLD:
     case MCSymbolRefExpr::VK_ARM_TLSGD:
+    case MCSymbolRefExpr::VK_ARM_TPOFF:
+    case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
       break;
     }
     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol());
-    SetType(SD, ELF::STT_TLS);
+    MCELF::SetType(SD, ELF::STT_TLS);
     break;
   }
 
@@ -489,7 +346,7 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
 
 void MCELFStreamer::Finish() {
   if (getNumFrameInfos())
-    MCDwarfFrameEmitter::Emit(*this);
+    MCDwarfFrameEmitter::Emit(*this, true);
 
   for (std::vector<LocalCommon>::const_iterator i = LocalCommons.begin(),
                                                 e = LocalCommons.end();
diff --git a/lib/MC/MCELFStreamer.h b/lib/MC/MCELFStreamer.h
new file mode 100644
index 0000000..db34d58
--- /dev/null
+++ b/lib/MC/MCELFStreamer.h
@@ -0,0 +1,274 @@
+//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ELF .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELFSTREAMER_H
+#define LLVM_MC_MCELFSTREAMER_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+
+class MCELFStreamer : public MCObjectStreamer {
+public:
+  MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                  raw_ostream &OS, MCCodeEmitter *Emitter)
+    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+  MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                raw_ostream &OS, MCCodeEmitter *Emitter,
+                MCAssembler *Assembler)
+    : MCObjectStreamer(Context, TAB, OS, Emitter, Assembler) {}
+
+
+  ~MCELFStreamer() {}
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void InitSections();
+  virtual void ChangeSection(const MCSection *Section);
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EmitCOFFSymbolType(int Type) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EndCOFFSymbolDef() {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+     SD.setSize(Value);
+  }
+
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitFileDirective(StringRef Filename);
+
+  virtual void Finish();
+
+private:
+  virtual void EmitInstToFragment(const MCInst &Inst);
+  virtual void EmitInstToData(const MCInst &Inst);
+
+  void fixSymbolsInTLSFixups(const MCExpr *expr);
+
+  struct LocalCommon {
+    MCSymbolData *SD;
+    uint64_t Size;
+    unsigned ByteAlignment;
+  };
+  std::vector<LocalCommon> LocalCommons;
+
+  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
+  /// @}
+  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
+                  SectionKind Kind) {
+    SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
+  }
+
+  void SetSectionData() {
+    SetSection(".data", ELF::SHT_PROGBITS,
+               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+               SectionKind::getDataRel());
+    EmitCodeAlignment(4, 0);
+  }
+  void SetSectionText() {
+    SetSection(".text", ELF::SHT_PROGBITS,
+               ELF::SHF_EXECINSTR |
+               ELF::SHF_ALLOC, SectionKind::getText());
+    EmitCodeAlignment(4, 0);
+  }
+  void SetSectionBss() {
+    SetSection(".bss", ELF::SHT_NOBITS,
+               ELF::SHF_WRITE |
+               ELF::SHF_ALLOC, SectionKind::getBSS());
+    EmitCodeAlignment(4, 0);
+  }
+};
+
+} // end llvm namespace
+
+#endif
+//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ELF .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELFSTREAMER_H
+#define LLVM_MC_MCELFSTREAMER_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+
+class MCELFStreamer : public MCObjectStreamer {
+public:
+  MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                  raw_ostream &OS, MCCodeEmitter *Emitter)
+    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+  ~MCELFStreamer() {}
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void InitSections();
+  virtual void ChangeSection(const MCSection *Section);
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EmitCOFFSymbolType(int Type) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EndCOFFSymbolDef() {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+     SD.setSize(Value);
+  }
+
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitFileDirective(StringRef Filename);
+
+  virtual void Finish();
+
+private:
+  virtual void EmitInstToFragment(const MCInst &Inst);
+  virtual void EmitInstToData(const MCInst &Inst);
+
+  void fixSymbolsInTLSFixups(const MCExpr *expr);
+
+  struct LocalCommon {
+    MCSymbolData *SD;
+    uint64_t Size;
+    unsigned ByteAlignment;
+  };
+  std::vector<LocalCommon> LocalCommons;
+
+  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
+  /// @}
+  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
+                  SectionKind Kind) {
+    SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
+  }
+
+  void SetSectionData() {
+    SetSection(".data", ELF::SHT_PROGBITS,
+               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+               SectionKind::getDataRel());
+    EmitCodeAlignment(4, 0);
+  }
+  void SetSectionText() {
+    SetSection(".text", ELF::SHT_PROGBITS,
+               ELF::SHF_EXECINSTR |
+               ELF::SHF_ALLOC, SectionKind::getText());
+    EmitCodeAlignment(4, 0);
+  }
+  void SetSectionBss() {
+    SetSection(".bss", ELF::SHT_NOBITS,
+               ELF::SHF_WRITE |
+               ELF::SHF_ALLOC, SectionKind::getBSS());
+    EmitCodeAlignment(4, 0);
+  }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 54d3743..3a674d7 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -310,6 +310,11 @@ static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
   if (AD.getFragment() == BD.getFragment()) {
     Addend += (AD.getOffset() - BD.getOffset());
 
+    // Pointers to Thumb symbols need to have their low-bit set to allow
+    // for interworking.
+    if (Asm->isThumbFunc(&SA))
+      Addend |= 1;
+
     // Clear the symbol expr pointers to indicate we have folded these
     // operands.
     A = B = 0;
@@ -384,7 +389,7 @@ static bool EvaluateSymbolicAdd(const MCAssembler *Asm,
     //   (LHS_A - RHS_B),
     //   (RHS_A - LHS_B),
     //   (RHS_A - RHS_B).
-    // Since we are attempting to be as aggresive as possible about folding, we
+    // Since we are attempting to be as aggressive as possible about folding, we
     // attempt to evaluate each possible alternative.
     AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, LHS_B,
                                         Result_Cst);
@@ -554,3 +559,45 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
   assert(0 && "Invalid assembly expression kind!");
   return false;
 }
+
+const MCSection *MCExpr::FindAssociatedSection() const {
+  switch (getKind()) {
+  case Target:
+    // We never look through target specific expressions.
+    return cast<MCTargetExpr>(this)->FindAssociatedSection();
+
+  case Constant:
+    return MCSymbol::AbsolutePseudoSection;
+
+  case SymbolRef: {
+    const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
+    const MCSymbol &Sym = SRE->getSymbol();
+
+    if (Sym.isDefined())
+      return &Sym.getSection();
+
+    return 0;
+  }
+
+  case Unary:
+    return cast<MCUnaryExpr>(this)->getSubExpr()->FindAssociatedSection();
+
+  case Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(this);
+    const MCSection *LHS_S = BE->getLHS()->FindAssociatedSection();
+    const MCSection *RHS_S = BE->getRHS()->FindAssociatedSection();
+
+    // If either section is absolute, return the other.
+    if (LHS_S == MCSymbol::AbsolutePseudoSection)
+      return RHS_S;
+    if (RHS_S == MCSymbol::AbsolutePseudoSection)
+      return LHS_S;
+
+    // Otherwise, return the first non-null section.
+    return LHS_S ? LHS_S : RHS_S;
+  }
+  }
+
+  assert(0 && "Invalid assembly expression kind!");
+  return 0;
+}
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index 92a7154..212b85e 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -19,3 +19,8 @@ MCInstPrinter::~MCInstPrinter() {
 StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const {
   return "";
 }
+
+StringRef MCInstPrinter::getRegName(unsigned RegNo) const {
+  assert(0 && "Target should implement this");
+  return "";
+}
diff --git a/lib/MC/MCLoggingStreamer.cpp b/lib/MC/MCLoggingStreamer.cpp
index 012c7f6..46ea9b8 100644
--- a/lib/MC/MCLoggingStreamer.cpp
+++ b/lib/MC/MCLoggingStreamer.cpp
@@ -154,21 +154,19 @@ public:
   }
 
   virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                             bool isPCRel, unsigned AddrSpace){
+                             unsigned AddrSpace){
     LogCall("EmitValue");
-    return Child->EmitValueImpl(Value, Size, isPCRel, AddrSpace);
+    return Child->EmitValueImpl(Value, Size, AddrSpace);
   }
 
-  virtual void EmitULEB128Value(const MCExpr *Value,
-                                unsigned AddrSpace = 0) {
+  virtual void EmitULEB128Value(const MCExpr *Value) {
     LogCall("EmitULEB128Value");
-    return Child->EmitULEB128Value(Value, AddrSpace);
+    return Child->EmitULEB128Value(Value);
   }
 
-  virtual void EmitSLEB128Value(const MCExpr *Value,
-                                unsigned AddrSpace = 0) {
+  virtual void EmitSLEB128Value(const MCExpr *Value) {
     LogCall("EmitSLEB128Value");
-    return Child->EmitSLEB128Value(Value, AddrSpace);
+    return Child->EmitSLEB128Value(Value);
   }
 
   virtual void EmitGPRel32Value(const MCExpr *Value) {
@@ -215,13 +213,14 @@ public:
 
   virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
                                      unsigned Column, unsigned Flags,
-                                     unsigned Isa, unsigned Discriminator) {
+                                     unsigned Isa, unsigned Discriminator,
+                                     StringRef FileName) {
     LogCall("EmitDwarfLocDirective",
             "FileNo:" + Twine(FileNo) + " Line:" + Twine(Line) +
             " Column:" + Twine(Column) + " Flags:" + Twine(Flags) +
             " Isa:" + Twine(Isa) + " Discriminator:" + Twine(Discriminator));
             return Child->EmitDwarfLocDirective(FileNo, Line, Column, Flags,
-                                                Isa, Discriminator);
+                                                Isa, Discriminator, FileName);
   }
 
   virtual void EmitInstruction(const MCInst &Inst) {
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index d1f9f5c..3da5b49 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -44,6 +44,8 @@ public:
 
   virtual void InitSections();
   virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
+                                   MCSymbol *EHSymbol);
   virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
   virtual void EmitThumbFunc(MCSymbol *Func);
   virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
@@ -101,6 +103,18 @@ void MCMachOStreamer::InitSections() {
 
 }
 
+void MCMachOStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
+                                          MCSymbol *EHSymbol) {
+  MCSymbolData &SD =
+    getAssembler().getOrCreateSymbolData(*Symbol);
+  if (SD.isExternal())
+    EmitSymbolAttribute(EHSymbol, MCSA_Global);
+  if (SD.getFlags() & SF_WeakDefinition)
+    EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition);
+  if (SD.isPrivateExtern())
+    EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern);
+}
+
 void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
 
@@ -363,6 +377,9 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
 }
 
 void MCMachOStreamer::Finish() {
+  if (getNumFrameInfos())
+    MCDwarfFrameEmitter::Emit(*this, true);
+
   // We have to set the fragment atom associations so we can relax properly for
   // Mach-O.
 
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 08ddf01..f38b822 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -67,11 +67,9 @@ namespace {
     virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {}
 
     virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                               bool isPCRel, unsigned AddrSpace) {}
-    virtual void EmitULEB128Value(const MCExpr *Value,
-                                  unsigned AddrSpace = 0) {}
-    virtual void EmitSLEB128Value(const MCExpr *Value,
-                                  unsigned AddrSpace = 0) {}
+                               unsigned AddrSpace) {}
+    virtual void EmitULEB128Value(const MCExpr *Value) {}
+    virtual void EmitSLEB128Value(const MCExpr *Value) {}
     virtual void EmitGPRel32Value(const MCExpr *Value) {}
     virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
                                       unsigned ValueSize = 1,
@@ -89,7 +87,8 @@ namespace {
     }
     virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
                                        unsigned Column, unsigned Flags,
-                                       unsigned Isa, unsigned Discriminator) {}
+                                       unsigned Isa, unsigned Discriminator,
+                                       StringRef FileName) {}
     virtual void EmitInstruction(const MCInst &Inst) {}
 
     virtual void Finish() {}
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index e67d9b0..0f349d0 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -31,6 +31,13 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
 {
 }
 
+MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                                   raw_ostream &OS, MCCodeEmitter *Emitter_,
+                                   MCAssembler *_Assembler)
+  : MCStreamer(Context), Assembler(_Assembler), CurSectionData(0)
+{
+}
+
 MCObjectStreamer::~MCObjectStreamer() {
   delete &Assembler->getBackend();
   delete &Assembler->getEmitter();
@@ -83,7 +90,7 @@ const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) {
 }
 
 void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
-                                     bool isPCRel, unsigned AddrSpace) {
+                                     unsigned AddrSpace) {
   assert(AddrSpace == 0 && "Address space must be 0!");
   MCDataFragment *DF = getOrCreateDataFragment();
 
@@ -95,15 +102,12 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
   }
   DF->addFixup(MCFixup::Create(DF->getContents().size(),
                                Value,
-                               MCFixup::getKindForSize(Size, isPCRel)));
+                               MCFixup::getKindForSize(Size, false)));
   DF->getContents().resize(DF->getContents().size() + Size, 0);
 }
 
 void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
-  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(getCurrentSection() && "Cannot emit before setting section!");
-
-  Symbol->setSection(*getCurrentSection());
+  MCStreamer::EmitLabel(Symbol);
 
   MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
 
@@ -117,23 +121,23 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
   SD.setOffset(F->getContents().size());
 }
 
-void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value,
-                                        unsigned AddrSpace) {
+void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
   int64_t IntValue;
   if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
-    EmitULEB128IntValue(IntValue, AddrSpace);
+    EmitULEB128IntValue(IntValue);
     return;
   }
+  Value = ForceExpAbs(this, getContext(), Value);
   new MCLEBFragment(*Value, false, getCurrentSectionData());
 }
 
-void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value,
-                                        unsigned AddrSpace) {
+void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) {
   int64_t IntValue;
   if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
-    EmitSLEB128IntValue(IntValue, AddrSpace);
+    EmitSLEB128IntValue(IntValue);
     return;
   }
+  Value = ForceExpAbs(this, getContext(), Value);
   new MCLEBFragment(*Value, true, getCurrentSectionData());
 }
 
@@ -184,30 +188,11 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
 void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) {
   MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
 
-  raw_svector_ostream VecOS(IF->getCode());
+  SmallString<128> Code;
+  raw_svector_ostream VecOS(Code);
   getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, IF->getFixups());
-}
-
-static const MCExpr *BuildSymbolDiff(MCContext &Context,
-                                     const MCSymbol *A, const MCSymbol *B) {
-  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
-  const MCExpr *ARef =
-    MCSymbolRefExpr::Create(A, Variant, Context);
-  const MCExpr *BRef =
-    MCSymbolRefExpr::Create(B, Variant, Context);
-  const MCExpr *AddrDelta =
-    MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context);
-  return AddrDelta;
-}
-
-static const MCExpr *ForceExpAbs(MCObjectStreamer *Streamer,
-                                  MCContext &Context, const MCExpr* Expr) {
- if (Context.getAsmInfo().hasAggressiveSymbolFolding())
-   return Expr;
-
- MCSymbol *ABS = Context.CreateTempSymbol();
- Streamer->EmitAssignment(ABS, Expr);
- return MCSymbolRefExpr::Create(ABS, Context);
+  VecOS.flush();
+  IF->getCode().append(Code.begin(), Code.end());
 }
 
 void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 89374d0..a3d3a49 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -213,13 +213,13 @@ AsmToken AsmLexer::LexDigit() {
 
     // Requires at least one binary digit.
     if (CurPtr == NumStart)
-      return ReturnError(TokStart, "Invalid binary number");
+      return ReturnError(TokStart, "invalid binary number");
 
     StringRef Result(TokStart, CurPtr - TokStart);
 
     long long Value;
     if (Result.substr(2).getAsInteger(2, Value))
-      return ReturnError(TokStart, "Invalid binary number");
+      return ReturnError(TokStart, "invalid binary number");
 
     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
     // suffixes on integer literals.
@@ -236,11 +236,11 @@ AsmToken AsmLexer::LexDigit() {
 
     // Requires at least one hex digit.
     if (CurPtr == NumStart)
-      return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+      return ReturnError(CurPtr-2, "invalid hexadecimal number");
 
     unsigned long long Result;
     if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
-      return ReturnError(TokStart, "Invalid hexadecimal number");
+      return ReturnError(TokStart, "invalid hexadecimal number");
 
     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
     // suffixes on integer literals.
@@ -251,13 +251,13 @@ AsmToken AsmLexer::LexDigit() {
   }
 
   // Must be an octal number, it starts with 0.
-  while (*CurPtr >= '0' && *CurPtr <= '7')
+  while (*CurPtr >= '0' && *CurPtr <= '9')
     ++CurPtr;
 
   StringRef Result(TokStart, CurPtr - TokStart);
   long long Value;
   if (Result.getAsInteger(8, Value))
-    return ReturnError(TokStart, "Invalid octal number");
+    return ReturnError(TokStart, "invalid octal number");
 
   // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
   // suffixes on integer literals.
@@ -324,8 +324,8 @@ AsmToken AsmLexer::LexQuote() {
 StringRef AsmLexer::LexUntilEndOfStatement() {
   TokStart = CurPtr;
 
-  while (!isAtStartOfComment(*CurPtr) && // Start of line comment.
-          *CurPtr != ';' &&  // End of statement marker.
+  while (!isAtStartOfComment(*CurPtr) &&    // Start of line comment.
+         !isAtStatementSeparator(CurPtr) && // End of statement marker.
          *CurPtr != '\n' &&
          *CurPtr != '\r' &&
          (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
@@ -339,6 +339,11 @@ bool AsmLexer::isAtStartOfComment(char Char) {
   return Char == *MAI.getCommentString();
 }
 
+bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
+  return strncmp(Ptr, MAI.getSeparatorString(),
+                 strlen(MAI.getSeparatorString())) == 0;
+}
+
 AsmToken AsmLexer::LexToken() {
   TokStart = CurPtr;
   // This always consumes at least one character.
@@ -346,6 +351,11 @@ AsmToken AsmLexer::LexToken() {
 
   if (isAtStartOfComment(CurChar))
     return LexLineComment();
+  if (isAtStatementSeparator(TokStart)) {
+    CurPtr += strlen(MAI.getSeparatorString()) - 1;
+    return AsmToken(AsmToken::EndOfStatement,
+                    StringRef(TokStart, strlen(MAI.getSeparatorString())));
+  }
 
   switch (CurChar) {
   default:
@@ -362,8 +372,8 @@ AsmToken AsmLexer::LexToken() {
     // Ignore whitespace.
     return LexToken();
   case '\n': // FALL THROUGH.
-  case '\r': // FALL THROUGH.
-  case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
+  case '\r':
+    return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
   case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
   case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
   case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index a84917f..d8fd27d 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -251,10 +251,14 @@ public:
                                                          ".cfi_def_cfa");
     AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaOffset>(
                                                          ".cfi_def_cfa_offset");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIAdjustCfaOffset>(
+                                                      ".cfi_adjust_cfa_offset");
     AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaRegister>(
                                                        ".cfi_def_cfa_register");
     AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIOffset>(
                                                                  ".cfi_offset");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIRelOffset>(
+                                                             ".cfi_rel_offset");
     AddDirectiveHandler<
      &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_personality");
     AddDirectiveHandler<
@@ -263,6 +267,8 @@ public:
       &GenericAsmParser::ParseDirectiveCFIRememberState>(".cfi_remember_state");
     AddDirectiveHandler<
       &GenericAsmParser::ParseDirectiveCFIRestoreState>(".cfi_restore_state");
+    AddDirectiveHandler<
+      &GenericAsmParser::ParseDirectiveCFISameValue>(".cfi_same_value");
 
     // Macro directives.
     AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
@@ -287,11 +293,14 @@ public:
   bool ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc);
   bool ParseDirectiveCFIDefCfa(StringRef, SMLoc DirectiveLoc);
   bool ParseDirectiveCFIDefCfaOffset(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIAdjustCfaOffset(StringRef, SMLoc DirectiveLoc);
   bool ParseDirectiveCFIDefCfaRegister(StringRef, SMLoc DirectiveLoc);
   bool ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIRelOffset(StringRef, SMLoc DirectiveLoc);
   bool ParseDirectiveCFIPersonalityOrLsda(StringRef, SMLoc DirectiveLoc);
   bool ParseDirectiveCFIRememberState(StringRef, SMLoc DirectiveLoc);
   bool ParseDirectiveCFIRestoreState(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFISameValue(StringRef, SMLoc DirectiveLoc);
 
   bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc);
   bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc);
@@ -517,6 +526,9 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
   switch (Lexer.getKind()) {
   default:
     return TokError("unknown token in expression");
+  // If we have an error assume that we've already handled it.
+  case AsmToken::Error:
+    return true;
   case AsmToken::Exclaim:
     Lex(); // Eat the operator.
     if (ParsePrimaryExpr(Res, EndLoc))
@@ -880,6 +892,7 @@ bool AsmParser::ParseStatement() {
     EatToEndOfStatement();
     return false;
   }
+
   // Allow an integer followed by a ':' as a directional local label.
   if (Lexer.is(AsmToken::Integer)) {
     LocalLabelVal = getTok().getIntVal();
@@ -896,13 +909,19 @@ bool AsmParser::ParseStatement() {
           return TokError("unexpected token at start of statement");
       }
     }
-  }
-  else if (ParseIdentifier(IDVal)) {
+
+  } else if (Lexer.is(AsmToken::Dot)) {
+    // Treat '.' as a valid identifier in this context.
+    Lex();
+    IDVal = ".";
+
+  } else if (ParseIdentifier(IDVal)) {
     if (!TheCondState.Ignore)
       return TokError("unexpected token at start of statement");
     IDVal = "";
   }
 
+
   // Handle conditional assembly here before checking for skipping.  We
   // have to do this so that .endif isn't skipped in a ".if 0" block for
   // example.
@@ -935,6 +954,10 @@ bool AsmParser::ParseStatement() {
     // identifier ':'   -> Label.
     Lex();
 
+    // Diagnose attempt to use '.' as a label.
+    if (IDVal == ".")
+      return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
+
     // Diagnose attempt to use a variable as a label.
     //
     // FIXME: Diagnostics. Note the location of the definition as a label.
@@ -978,7 +1001,7 @@ bool AsmParser::ParseStatement() {
       return HandleMacroEntry(IDVal, IDLoc, M);
 
   // Otherwise, we have a normal instruction or directive.
-  if (IDVal[0] == '.') {
+  if (IDVal[0] == '.' && IDVal != ".") {
     // Assembler features
     if (IDVal == ".set" || IDVal == ".equ")
       return ParseDirectiveSet(IDVal, true);
@@ -1041,7 +1064,7 @@ bool AsmParser::ParseStatement() {
 
     if (IDVal == ".fill")
       return ParseDirectiveFill();
-    if (IDVal == ".space")
+    if (IDVal == ".space" || IDVal == ".skip")
       return ParseDirectiveSpace();
     if (IDVal == ".zero")
       return ParseDirectiveZero();
@@ -1306,6 +1329,12 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) {
   if (Lexer.isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in assignment");
 
+  // Error on assignment to '.'.
+  if (Name == ".") {
+    return Error(EqualLoc, ("assignment to pseudo-symbol '.' is unsupported "
+                            "(use '.space' or '.org').)"));
+  }
+
   // Eat the end of statement marker.
   Lex();
 
@@ -1319,7 +1348,7 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) {
     // FIXME: Diagnose assignment to protected identifier (e.g., register name).
     if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
       ; // Allow redefinitions of undefined symbols only used in directives.
-    else if (!Sym->isUndefined() && (!Sym->isAbsolute() || !allow_redef))
+    else if (!Sym->isUndefined() && (!Sym->isVariable() || !allow_redef))
       return Error(EqualLoc, "redefinition of '" + Name + "'");
     else if (!Sym->isVariable())
       return Error(EqualLoc, "invalid assignment to '" + Name + "'");
@@ -1535,13 +1564,21 @@ bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) {
         Lex();
 
       if (getLexer().isNot(AsmToken::Integer) &&
-          getLexer().isNot(AsmToken::Real))
+          getLexer().isNot(AsmToken::Real) &&
+          getLexer().isNot(AsmToken::Identifier))
         return TokError("unexpected token in directive");
 
       // Convert to an APFloat.
       APFloat Value(Semantics);
-      if (Value.convertFromString(getTok().getString(),
-                                  APFloat::rmNearestTiesToEven) ==
+      StringRef IDVal = getTok().getString();
+      if (getLexer().is(AsmToken::Identifier)) {
+        if (!IDVal.compare_lower("infinity") || !IDVal.compare_lower("inf"))
+          Value = APFloat::getInf(Semantics);
+        else if (!IDVal.compare_lower("nan"))
+          Value = APFloat::getNaN(Semantics, false, ~0);
+        else
+          return TokError("invalid floating point literal");
+      } else if (Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) ==
           APFloat::opInvalidOp)
         return TokError("invalid floating point literal");
       if (IsNeg)
@@ -2216,7 +2253,7 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
   }
 
   getStreamer().EmitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
-                                      Isa, Discriminator);
+                                      Isa, Discriminator, StringRef());
 
   return false;
 }
@@ -2232,13 +2269,15 @@ bool GenericAsmParser::ParseDirectiveStabs(StringRef Directive,
 /// ::= .cfi_startproc
 bool GenericAsmParser::ParseDirectiveCFIStartProc(StringRef,
                                                   SMLoc DirectiveLoc) {
-  return getStreamer().EmitCFIStartProc();
+  getStreamer().EmitCFIStartProc();
+  return false;
 }
 
 /// ParseDirectiveCFIEndProc
 /// ::= .cfi_endproc
 bool GenericAsmParser::ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc) {
-  return getStreamer().EmitCFIEndProc();
+  getStreamer().EmitCFIEndProc();
+  return false;
 }
 
 /// ParseRegisterOrRegisterNumber - parse register name or number.
@@ -2273,7 +2312,8 @@ bool GenericAsmParser::ParseDirectiveCFIDefCfa(StringRef,
   if (getParser().ParseAbsoluteExpression(Offset))
     return true;
 
-  return getStreamer().EmitCFIDefCfa(Register, Offset);
+  getStreamer().EmitCFIDefCfa(Register, Offset);
+  return false;
 }
 
 /// ParseDirectiveCFIDefCfaOffset
@@ -2284,7 +2324,20 @@ bool GenericAsmParser::ParseDirectiveCFIDefCfaOffset(StringRef,
   if (getParser().ParseAbsoluteExpression(Offset))
     return true;
 
-  return getStreamer().EmitCFIDefCfaOffset(Offset);
+  getStreamer().EmitCFIDefCfaOffset(Offset);
+  return false;
+}
+
+/// ParseDirectiveCFIAdjustCfaOffset
+/// ::= .cfi_adjust_cfa_offset adjustment
+bool GenericAsmParser::ParseDirectiveCFIAdjustCfaOffset(StringRef,
+                                                        SMLoc DirectiveLoc) {
+  int64_t Adjustment = 0;
+  if (getParser().ParseAbsoluteExpression(Adjustment))
+    return true;
+
+  getStreamer().EmitCFIAdjustCfaOffset(Adjustment);
+  return false;
 }
 
 /// ParseDirectiveCFIDefCfaRegister
@@ -2295,11 +2348,12 @@ bool GenericAsmParser::ParseDirectiveCFIDefCfaRegister(StringRef,
   if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
     return true;
 
-  return getStreamer().EmitCFIDefCfaRegister(Register);
+  getStreamer().EmitCFIDefCfaRegister(Register);
+  return false;
 }
 
 /// ParseDirectiveCFIOffset
-/// ::= .cfi_off register, offset
+/// ::= .cfi_offset register, offset
 bool GenericAsmParser::ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc) {
   int64_t Register = 0;
   int64_t Offset = 0;
@@ -2314,7 +2368,29 @@ bool GenericAsmParser::ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc) {
   if (getParser().ParseAbsoluteExpression(Offset))
     return true;
 
-  return getStreamer().EmitCFIOffset(Register, Offset);
+  getStreamer().EmitCFIOffset(Register, Offset);
+  return false;
+}
+
+/// ParseDirectiveCFIRelOffset
+/// ::= .cfi_rel_offset register, offset
+bool GenericAsmParser::ParseDirectiveCFIRelOffset(StringRef,
+                                                  SMLoc DirectiveLoc) {
+  int64_t Register = 0;
+
+  if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Offset = 0;
+  if (getParser().ParseAbsoluteExpression(Offset))
+    return true;
+
+  getStreamer().EmitCFIRelOffset(Register, Offset);
+  return false;
 }
 
 static bool isValidEncoding(int64_t Encoding) {
@@ -2364,25 +2440,42 @@ bool GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda(StringRef IDVal,
   MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
 
   if (IDVal == ".cfi_personality")
-    return getStreamer().EmitCFIPersonality(Sym, Encoding);
+    getStreamer().EmitCFIPersonality(Sym, Encoding);
   else {
     assert(IDVal == ".cfi_lsda");
-    return getStreamer().EmitCFILsda(Sym, Encoding);
+    getStreamer().EmitCFILsda(Sym, Encoding);
   }
+  return false;
 }
 
 /// ParseDirectiveCFIRememberState
 /// ::= .cfi_remember_state
 bool GenericAsmParser::ParseDirectiveCFIRememberState(StringRef IDVal,
                                                       SMLoc DirectiveLoc) {
-  return getStreamer().EmitCFIRememberState();
+  getStreamer().EmitCFIRememberState();
+  return false;
 }
 
 /// ParseDirectiveCFIRestoreState
 /// ::= .cfi_remember_state
 bool GenericAsmParser::ParseDirectiveCFIRestoreState(StringRef IDVal,
                                                      SMLoc DirectiveLoc) {
-  return getStreamer().EmitCFIRestoreState();
+  getStreamer().EmitCFIRestoreState();
+  return false;
+}
+
+/// ParseDirectiveCFISameValue
+/// ::= .cfi_same_value register
+bool GenericAsmParser::ParseDirectiveCFISameValue(StringRef IDVal,
+                                                  SMLoc DirectiveLoc) {
+  int64_t Register = 0;
+
+  if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+    return true;
+
+  getStreamer().EmitCFISameValue(Register);
+
+  return false;
 }
 
 /// ParseDirectiveMacrosOnOff
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 44f2345..3c092cd 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -100,6 +100,8 @@ public:
     AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text");
     AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>(".thread_init_func");
     AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv");
+
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveIdent>(".ident");
   }
 
   bool ParseDirectiveDesc(StringRef, SMLoc);
@@ -277,6 +279,11 @@ public:
     return ParseSectionSwitch("__DATA", "__thread_vars",
                               MCSectionMachO::S_THREAD_LOCAL_VARIABLES);
   }
+  bool ParseSectionDirectiveIdent(StringRef, SMLoc) {
+    // Darwin silently ignores the .ident directive.
+    getParser().EatToEndOfStatement();
+    return false;
+  }
   bool ParseSectionDirectiveThreadInitFunc(StringRef, SMLoc) {
     return ParseSectionSwitch("__DATA", "__thread_init",
                          MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS);
@@ -427,10 +434,12 @@ bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
 
 
   StringRef Segment, Section;
-  unsigned TAA, StubSize;
+  unsigned StubSize;
+  unsigned TAA;
+  bool TAAParsed;
   std::string ErrorStr =
     MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section,
-                                          TAA, StubSize);
+                                          TAA, TAAParsed, StubSize);
 
   if (!ErrorStr.empty())
     return Error(Loc, ErrorStr.c_str());
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index d32aea1..dfd77c3 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -39,8 +39,28 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
     return;
   }
 
-  OS << "\t.section\t" << getSectionName();
-  
+  StringRef name = getSectionName();
+  if (name.find_first_not_of("0123456789_."
+                             "abcdefghijklmnopqrstuvwxyz"
+                             "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == name.npos) {
+    OS << "\t.section\t" << name;
+  } else {
+    OS << "\t.section\t\"";
+    for (const char *b = name.begin(), *e = name.end(); b < e; ++b) {
+      if (*b == '"') // Unquoted "
+        OS << "\\\"";
+      else if (*b != '\\') // Neither " or backslash
+        OS << *b;
+      else if (b + 1 == e) // Trailing backslash
+        OS << "\\\\";
+      else {
+        OS << b[0] << b[1]; // Quoted character
+        ++b;
+      }
+    }
+    OS << '"';
+  }
+
   // Handle the weird solaris syntax if desired.
   if (MAI.usesSunStyleELFSectionSwitchSyntax() && 
       !(Flags & ELF::SHF_MERGE)) {
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index 577e93a..e771556 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -180,7 +180,9 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
                                                   StringRef &Segment,    // Out.
                                                   StringRef &Section,    // Out.
                                                   unsigned  &TAA,        // Out.
+                                                  bool      &TAAParsed,  // Out.
                                                   unsigned  &StubSize) { // Out.
+  TAAParsed = false;
   // Find the first comma.
   std::pair<StringRef, StringRef> Comma = Spec.split(',');
 
@@ -211,6 +213,7 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
            "between 1 and 16 characters";
 
   // If there is no comma after the section, we're done.
+  TAA = 0;
   StubSize = 0;
   if (Comma.second.empty())
     return "";
@@ -235,6 +238,7 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
 
   // Remember the TypeID.
   TAA = TypeID;
+  TAAParsed = true;
 
   // If we have no comma after the section type, there are no attributes.
   if (Comma.second.empty()) {
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 4b302c8..fa245b1 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -12,6 +12,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallString.h"
@@ -27,6 +28,29 @@ MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx) {
 MCStreamer::~MCStreamer() {
 }
 
+const MCExpr *MCStreamer::BuildSymbolDiff(MCContext &Context,
+                                          const MCSymbol *A,
+                                          const MCSymbol *B) {
+  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+  const MCExpr *ARef =
+    MCSymbolRefExpr::Create(A, Variant, Context);
+  const MCExpr *BRef =
+    MCSymbolRefExpr::Create(B, Variant, Context);
+  const MCExpr *AddrDelta =
+    MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context);
+  return AddrDelta;
+}
+
+const MCExpr *MCStreamer::ForceExpAbs(MCStreamer *Streamer,
+                                      MCContext &Context, const MCExpr* Expr) {
+ if (Context.getAsmInfo().hasAggressiveSymbolFolding())
+   return Expr;
+
+ MCSymbol *ABS = Context.CreateTempSymbol();
+ Streamer->EmitAssignment(ABS, Expr);
+ return MCSymbolRefExpr::Create(ABS, Context);
+}
+
 raw_ostream &MCStreamer::GetCommentOS() {
   // By default, discard comments.
   return nulls();
@@ -90,30 +114,15 @@ void MCStreamer::EmitAbsValue(const MCExpr *Value, unsigned Size,
 
 void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size,
                            unsigned AddrSpace) {
-  EmitValueImpl(Value, Size, false, AddrSpace);
-}
-
-void MCStreamer::EmitPCRelValue(const MCExpr *Value, unsigned Size,
-                                unsigned AddrSpace) {
-  EmitValueImpl(Value, Size, true, AddrSpace);
+  EmitValueImpl(Value, Size, AddrSpace);
 }
 
 void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
-                                 bool isPCRel, unsigned AddrSpace) {
-  EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size, isPCRel,
+                                  unsigned AddrSpace) {
+  EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size,
                 AddrSpace);
 }
 
-void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
-                                 unsigned AddrSpace) {
-  EmitSymbolValue(Sym, Size, false, AddrSpace);
-}
-
-void MCStreamer::EmitPCRelSymbolValue(const MCSymbol *Sym, unsigned Size,
-                                      unsigned AddrSpace) {
-  EmitSymbolValue(Sym, Size, true, AddrSpace);
-}
-
 void MCStreamer::EmitGPRel32Value(const MCExpr *Value) {
   report_fatal_error("unsupported directive in streamer");
 }
@@ -135,7 +144,8 @@ bool MCStreamer::EmitDwarfFileDirective(unsigned FileNo,
 void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
                                        unsigned Column, unsigned Flags,
                                        unsigned Isa,
-                                       unsigned Discriminator) {
+                                       unsigned Discriminator,
+                                       StringRef FileName) {
   getContext().setCurrentDwarfLoc(FileNo, Line, Column, Flags, Isa,
                                   Discriminator);
 }
@@ -152,28 +162,39 @@ void MCStreamer::EnsureValidFrame() {
     report_fatal_error("No open frame");
 }
 
-bool MCStreamer::EmitCFIStartProc() {
+void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
+                                     MCSymbol *EHSymbol) {
+}
+
+void MCStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+  assert(getCurrentSection() && "Cannot emit before setting section!");
+  Symbol->setSection(*getCurrentSection());
+
+  StringRef Prefix = getContext().getAsmInfo().getPrivateGlobalPrefix();
+  if (!Symbol->getName().startswith(Prefix))
+    LastNonPrivate = Symbol;
+}
+
+void MCStreamer::EmitCFIStartProc() {
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
-  if (CurFrame && !CurFrame->End) {
+  if (CurFrame && !CurFrame->End)
     report_fatal_error("Starting a frame before finishing the previous one!");
-    return true;
-  }
   MCDwarfFrameInfo Frame;
   Frame.Begin = getContext().CreateTempSymbol();
+  Frame.Function = LastNonPrivate;
   EmitLabel(Frame.Begin);
   FrameInfos.push_back(Frame);
-  return false;
 }
 
-bool MCStreamer::EmitCFIEndProc() {
+void MCStreamer::EmitCFIEndProc() {
   EnsureValidFrame();
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
   CurFrame->End = getContext().CreateTempSymbol();
   EmitLabel(CurFrame->End);
-  return false;
 }
 
-bool MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
+void MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
   EnsureValidFrame();
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
   MCSymbol *Label = getContext().CreateTempSymbol();
@@ -182,10 +203,9 @@ bool MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
   MachineLocation Source(Register, -Offset);
   MCCFIInstruction Instruction(Label, Dest, Source);
   CurFrame->Instructions.push_back(Instruction);
-  return false;
 }
 
-bool MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
+void MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
   EnsureValidFrame();
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
   MCSymbol *Label = getContext().CreateTempSymbol();
@@ -194,10 +214,20 @@ bool MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
   MachineLocation Source(MachineLocation::VirtualFP, -Offset);
   MCCFIInstruction Instruction(Label, Dest, Source);
   CurFrame->Instructions.push_back(Instruction);
-  return false;
 }
 
-bool MCStreamer::EmitCFIDefCfaRegister(int64_t Register) {
+void MCStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(MachineLocation::VirtualFP);
+  MachineLocation Source(MachineLocation::VirtualFP, Adjustment);
+  MCCFIInstruction Instruction(MCCFIInstruction::RelMove, Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIDefCfaRegister(int64_t Register) {
   EnsureValidFrame();
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
   MCSymbol *Label = getContext().CreateTempSymbol();
@@ -206,10 +236,9 @@ bool MCStreamer::EmitCFIDefCfaRegister(int64_t Register) {
   MachineLocation Source(MachineLocation::VirtualFP);
   MCCFIInstruction Instruction(Label, Dest, Source);
   CurFrame->Instructions.push_back(Instruction);
-  return false;
 }
 
-bool MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
+void MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
   EnsureValidFrame();
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
   MCSymbol *Label = getContext().CreateTempSymbol();
@@ -218,37 +247,44 @@ bool MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
   MachineLocation Source(Register, Offset);
   MCCFIInstruction Instruction(Label, Dest, Source);
   CurFrame->Instructions.push_back(Instruction);
-  return false;
 }
 
-bool MCStreamer::EmitCFIPersonality(const MCSymbol *Sym,
+void MCStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(Register, Offset);
+  MachineLocation Source(Register, Offset);
+  MCCFIInstruction Instruction(MCCFIInstruction::RelMove, Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIPersonality(const MCSymbol *Sym,
                                     unsigned Encoding) {
   EnsureValidFrame();
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
   CurFrame->Personality = Sym;
   CurFrame->PersonalityEncoding = Encoding;
-  return false;
 }
 
-bool MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
+void MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
   EnsureValidFrame();
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
   CurFrame->Lsda = Sym;
   CurFrame->LsdaEncoding = Encoding;
-  return false;
 }
 
-bool MCStreamer::EmitCFIRememberState() {
+void MCStreamer::EmitCFIRememberState() {
   EnsureValidFrame();
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
   MCSymbol *Label = getContext().CreateTempSymbol();
   EmitLabel(Label);
   MCCFIInstruction Instruction(MCCFIInstruction::Remember, Label);
   CurFrame->Instructions.push_back(Instruction);
-  return false;
 }
 
-bool MCStreamer::EmitCFIRestoreState() {
+void MCStreamer::EmitCFIRestoreState() {
   // FIXME: Error if there is no matching cfi_remember_state.
   EnsureValidFrame();
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
@@ -256,7 +292,55 @@ bool MCStreamer::EmitCFIRestoreState() {
   EmitLabel(Label);
   MCCFIInstruction Instruction(MCCFIInstruction::Restore, Label);
   CurFrame->Instructions.push_back(Instruction);
-  return false;
+}
+
+void MCStreamer::EmitCFISameValue(int64_t Register) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MCCFIInstruction Instruction(MCCFIInstruction::SameValue, Label, Register);
+  CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitFnStart() {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitFnEnd() {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitCantUnwind() {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitHandlerData() {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitPersonality(const MCSymbol *Personality) {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitPad(int64_t Offset) {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool) {
+  errs() << "Not implemented yet\n";
+  abort();
 }
 
 /// EmitRawText - If this file is backed by an assembly streamer, this dumps
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index 1c71f26..c2fad16 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -58,9 +58,13 @@ void MCSymbol::setVariableValue(const MCExpr *Value) {
          "Invalid redefinition!");
   this->Value = Value;
 
-  // Mark the variable as absolute as appropriate.
-  if (isa<MCConstantExpr>(Value))
-    setAbsolute();
+  // Variables should always be marked as in the same "section" as the value.
+  const MCSection *Section = Value->FindAssociatedSection();
+  if (Section) {
+    setSection(*Section);
+  } else {
+    setUndefined();
+  }
 }
 
 void MCSymbol::print(raw_ostream &OS) const {
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index 8af07c7..f049b1c 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -121,6 +121,33 @@ private:
   }
   uint64_t getSymbolAddress(const MCSymbolData* SD,
                             const MCAsmLayout &Layout) const {
+    const MCSymbol &S = SD->getSymbol();
+
+    // If this is a variable, then recursively evaluate now.
+    if (S.isVariable()) {
+      MCValue Target;
+      if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout))
+        report_fatal_error("unable to evaluate offset for variable '" +
+                           S.getName() + "'");
+
+      // Verify that any used symbols are defined.
+      if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
+        report_fatal_error("unable to evaluate offset to undefined symbol '" +
+                           Target.getSymA()->getSymbol().getName() + "'");
+      if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
+        report_fatal_error("unable to evaluate offset to undefined symbol '" +
+                           Target.getSymB()->getSymbol().getName() + "'");
+
+      uint64_t Address = Target.getConstant();
+      if (Target.getSymA())
+        Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
+                                      Target.getSymA()->getSymbol()), Layout);
+      if (Target.getSymB())
+        Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
+                                      Target.getSymB()->getSymbol()), Layout);
+      return Address;
+    }
+
     return getSectionAddress(SD->getFragment()->getParent()) +
       Layout.getSymbolOffset(SD);
   }
@@ -274,8 +301,8 @@ public:
     if (is64Bit())
       Write32(0); // reserved3
 
-    assert(OS.tell() - Start == is64Bit() ? macho::Section64Size :
-           macho::Section32Size);
+    assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size :
+           macho::Section32Size));
   }
 
   void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
@@ -440,7 +467,7 @@ public:
       // Compensate for the relocation offset, Darwin x86_64 relocations only
       // have the addend and appear to have attempted to define it to be the
       // actual expression addend without the PCrel bias. However, instructions
-      // with data following the relocation are not accomodated for (see comment
+      // with data following the relocation are not accommodated for (see comment
       // below regarding SIGNED{1,2,4}), so it isn't exactly that either.
       Value += 1LL << Log2Size;
     }
@@ -541,7 +568,7 @@ public:
       }
 
       // x86_64 almost always uses external relocations, except when there is no
-      // symbol to use as a base address (a local symbol with no preceeding
+      // symbol to use as a base address (a local symbol with no preceding
       // non-local symbol).
       if (Base) {
         Index = Base->getIndex();
@@ -550,7 +577,7 @@ public:
         // Add the local offset, if needed.
         if (Base != &SD)
           Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
-      } else if (Symbol->isInSection()) {
+      } else if (Symbol->isInSection() && !Symbol->isVariable()) {
         // The index is the section ordinal (1-based).
         Index = SD.getFragment()->getParent()->getOrdinal() + 1;
         IsExtern = 0;
@@ -821,12 +848,12 @@ public:
     //  1 - :upper16: for movt instructions
     // high bit of r_length:
     //  0 - arm instructions
-    //  1 - thumb instructions   
+    //  1 - thumb instructions
     // the other half of the relocated expression is in the following pair
     // relocation entry in the the low 16 bits of r_address field.
     unsigned ThumbBit = 0;
     unsigned MovtBit = 0;
-    switch (Fixup.getKind()) {
+    switch ((unsigned)Fixup.getKind()) {
     default: break;
     case ARM::fixup_arm_movt_hi16:
     case ARM::fixup_arm_movt_hi16_pcrel:
@@ -952,15 +979,10 @@ public:
       RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
       Log2Size = llvm::Log2_32(2);
       return true;
-
+      
     case ARM::fixup_arm_thumb_bl:
-      RelocType = unsigned(macho::RIT_ARM_ThumbBranch32Bit);
-      Log2Size = llvm::Log2_32(4);
-      return true;
-
     case ARM::fixup_arm_thumb_blx:
       RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
-      // Report as 'long', even though that is not quite accurate.
       Log2Size = llvm::Log2_32(4);
       return true;
 
@@ -1033,17 +1055,17 @@ public:
       // FIXME!
       report_fatal_error("FIXME: relocations to absolute targets "
                          "not yet implemented");
-    } else if (SD->getSymbol().isVariable()) {
-      int64_t Res;
-      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
-            Res, Layout, SectionAddress)) {
-        FixedValue = Res;
-        return;
+    } else {
+      // Resolve constant variables.
+      if (SD->getSymbol().isVariable()) {
+        int64_t Res;
+        if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+              Res, Layout, SectionAddress)) {
+          FixedValue = Res;
+          return;
+        }
       }
 
-      report_fatal_error("unsupported relocation of variable '" +
-                         SD->getSymbol().getName() + "'");
-    } else {
       // Check whether we need an external or internal relocation.
       if (doesSymbolRequireExternRelocation(SD)) {
         IsExtern = 1;
@@ -1055,8 +1077,10 @@ public:
           FixedValue -= Layout.getSymbolOffset(SD);
       } else {
         // The index is the section ordinal (1-based).
-        Index = SD->getFragment()->getParent()->getOrdinal() + 1;
-        FixedValue += getSectionAddress(SD->getFragment()->getParent());
+        const MCSectionData &SymSD = Asm.getSectionData(
+          SD->getSymbol().getSection());
+        Index = SymSD.getOrdinal() + 1;
+        FixedValue += getSectionAddress(&SymSD);
       }
       if (IsPCRel)
         FixedValue -= getSectionAddress(Fragment->getParent());
@@ -1132,17 +1156,17 @@ public:
       // FIXME: Currently, these are never generated (see code below). I cannot
       // find a case where they are actually emitted.
       Type = macho::RIT_Vanilla;
-    } else if (SD->getSymbol().isVariable()) {
-      int64_t Res;
-      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
-            Res, Layout, SectionAddress)) {
-        FixedValue = Res;
-        return;
+    } else {
+      // Resolve constant variables.
+      if (SD->getSymbol().isVariable()) {
+        int64_t Res;
+        if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+              Res, Layout, SectionAddress)) {
+          FixedValue = Res;
+          return;
+        }
       }
 
-      report_fatal_error("unsupported relocation of variable '" +
-                         SD->getSymbol().getName() + "'");
-    } else {
       // Check whether we need an external or internal relocation.
       if (doesSymbolRequireExternRelocation(SD)) {
         IsExtern = 1;
@@ -1154,8 +1178,10 @@ public:
           FixedValue -= Layout.getSymbolOffset(SD);
       } else {
         // The index is the section ordinal (1-based).
-        Index = SD->getFragment()->getParent()->getOrdinal() + 1;
-        FixedValue += getSectionAddress(SD->getFragment()->getParent());
+        const MCSectionData &SymSD = Asm.getSectionData(
+          SD->getSymbol().getSection());
+        Index = SymSD.getOrdinal() + 1;
+        FixedValue += getSectionAddress(&SymSD);
       }
       if (IsPCRel)
         FixedValue -= getSectionAddress(Fragment->getParent());
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 6ca5d37..101237a 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -647,22 +647,27 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
 
   COFFSection *coff_section = SectionMap[&SectionData->getSection()];
   COFFSymbol *coff_symbol = SymbolMap[&A_SD.getSymbol()];
+  const MCSymbolRefExpr *SymA = Target.getSymA();
+  const MCSymbolRefExpr *SymB = Target.getSymB();
+  const bool CrossSection = SymB &&
+    &SymA->getSymbol().getSection() != &SymB->getSymbol().getSection();
 
   if (Target.getSymB()) {
-    if (&Target.getSymA()->getSymbol().getSection()
-     != &Target.getSymB()->getSymbol().getSection()) {
-      llvm_unreachable("Symbol relative relocations are only allowed between "
-                       "symbols in the same section");
-    }
     const MCSymbol *B = &Target.getSymB()->getSymbol();
     MCSymbolData &B_SD = Asm.getSymbolData(*B);
 
-    FixedValue = Layout.getSymbolOffset(&A_SD) - Layout.getSymbolOffset(&B_SD);
+    // Offset of the symbol in the section
+    int64_t a = Layout.getSymbolOffset(&B_SD);
+
+    // Ofeset of the relocation in the section
+    int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
 
+    FixedValue = b - a;
     // In the case where we have SymbA and SymB, we just need to store the delta
     // between the two symbols.  Update FixedValue to account for the delta, and
     // skip recording the relocation.
-    return;
+    if (!CrossSection)
+      return;
   } else {
     FixedValue = Target.getConstant();
   }
@@ -673,7 +678,7 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
   Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
 
   // Turn relocations for temporary symbols into section relocations.
-  if (coff_symbol->MCData->getSymbol().isTemporary()) {
+  if (coff_symbol->MCData->getSymbol().isTemporary() || CrossSection) {
     Reloc.Symb = coff_symbol->Section->Symbol;
     FixedValue += Layout.getFragmentOffset(coff_symbol->MCData->Fragment)
                 + coff_symbol->MCData->getOffset();
@@ -684,7 +689,12 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
 
   Reloc.Data.VirtualAddress += Fixup.getOffset();
 
-  switch ((unsigned)Fixup.getKind()) {
+  unsigned FixupKind = Fixup.getKind();
+
+  if (CrossSection)
+    FixupKind = FK_PCRel_4;
+
+  switch (FixupKind) {
   case FK_PCRel_4:
   case X86::reloc_riprel_4byte:
   case X86::reloc_riprel_4byte_movq_load:
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index 6a6814f..642a8ec 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -1,5 +1,7 @@
 add_llvm_library(LLVMObject
   MachOObject.cpp
+  MachOObjectFile.cpp
+  Object.cpp
   ObjectFile.cpp
   COFFObjectFile.cpp
   ELFObjectFile.cpp
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index cfee82a..86bf44b 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -91,6 +91,7 @@ extern char coff_coff_section_layout_static_assert
 namespace {
 class COFFObjectFile : public ObjectFile {
 private:
+        uint64_t         HeaderOff;
   const coff_file_header *Header;
   const coff_section     *SectionTable;
   const coff_symbol      *SymbolTable;
@@ -185,11 +186,8 @@ char COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb) const {
     return ret;
 
   uint32_t Characteristics = 0;
-  uint32_t PointerToRawData = 0;
-  const coff_section *Section = getSection(symb->SectionNumber);
-  if (Section) {
+  if (const coff_section *Section = getSection(symb->SectionNumber)) {
     Characteristics = Section->Characteristics;
-    PointerToRawData = Section->PointerToRawData;
   }
 
   switch (symb->SectionNumber) {
@@ -256,7 +254,7 @@ StringRef COFFObjectFile::getSectionName(DataRefImpl Sec) const {
   // Check for string table entry. First byte is '/'.
   if (name[0] == '/') {
     uint32_t Offset;
-    name.getAsInteger(10, Offset);
+    name.substr(1).getAsInteger(10, Offset);
     return StringRef(getString(Offset));
   }
 
@@ -287,9 +285,20 @@ bool COFFObjectFile::isSectionText(DataRefImpl Sec) const {
 
 COFFObjectFile::COFFObjectFile(MemoryBuffer *Object)
   : ObjectFile(Object) {
-  Header = reinterpret_cast<const coff_file_header *>(base);
+
+  HeaderOff = 0;
+
+  if (base[0] == 0x4d && base[1] == 0x5a) {
+    // PE/COFF, seek through MS-DOS compatibility stub and 4-byte
+    // PE signature to find 'normal' COFF header.
+    HeaderOff += *reinterpret_cast<const ulittle32_t *>(base + 0x3c);
+    HeaderOff += 4;
+  }
+
+  Header = reinterpret_cast<const coff_file_header *>(base + HeaderOff);
   SectionTable =
     reinterpret_cast<const coff_section *>( base
+                                          + HeaderOff
                                           + sizeof(coff_file_header)
                                           + Header->SizeOfOptionalHeader);
   SymbolTable =
@@ -303,6 +312,7 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object)
 
 ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const {
   DataRefImpl ret;
+  memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(SymbolTable);
   return symbol_iterator(SymbolRef(ret, this));
 }
@@ -310,18 +320,21 @@ ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const {
 ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const {
   // The symbol table ends where the string table begins.
   DataRefImpl ret;
+  memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(StringTable);
   return symbol_iterator(SymbolRef(ret, this));
 }
 
 ObjectFile::section_iterator COFFObjectFile::begin_sections() const {
   DataRefImpl ret;
+  memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(SectionTable);
   return section_iterator(SectionRef(ret, this));
 }
 
 ObjectFile::section_iterator COFFObjectFile::end_sections() const {
   DataRefImpl ret;
+  memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections);
   return section_iterator(SectionRef(ret, this));
 }
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index 682be77..d2a2726 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -547,6 +547,7 @@ template<support::endianness target_endianness, bool is64Bits>
 ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
                                           ::begin_sections() const {
   DataRefImpl ret;
+  memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(base + Header->e_shoff);
   return section_iterator(SectionRef(ret, this));
 }
@@ -555,6 +556,7 @@ template<support::endianness target_endianness, bool is64Bits>
 ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
                                           ::end_sections() const {
   DataRefImpl ret;
+  memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(base
                                      + Header->e_shoff
                                      + (Header->e_shentsize * Header->e_shnum));
diff --git a/lib/Object/MachOObject.cpp b/lib/Object/MachOObject.cpp
index 5e64d63..9890feb 100644
--- a/lib/Object/MachOObject.cpp
+++ b/lib/Object/MachOObject.cpp
@@ -12,6 +12,8 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
 
 using namespace llvm;
 using namespace llvm::object;
@@ -340,3 +342,29 @@ void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset,
                      Index * sizeof(macho::Symbol64TableEntry));
   ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
 }
+
+/* ** */
+// Object Dumping Facilities
+void MachOObject::dump() const { print(dbgs()); dbgs() << '\n'; }
+void MachOObject::dumpHeader() const { printHeader(dbgs()); dbgs() << '\n'; }
+
+void MachOObject::printHeader(raw_ostream &O) const {
+  O << "('cputype', " << Header.CPUType << ")\n";
+  O << "('cpusubtype', " << Header.CPUSubtype << ")\n";
+  O << "('filetype', " << Header.FileType << ")\n";
+  O << "('num_load_commands', " << Header.NumLoadCommands << ")\n";
+  O << "('load_commands_size', " << Header.SizeOfLoadCommands << ")\n";
+  O << "('flag', " << Header.Flags << ")\n";
+  
+  // Print extended header if 64-bit.
+  if (is64Bit())
+    O << "('reserved', " << Header64Ext.Reserved << ")\n";
+}
+
+void MachOObject::print(raw_ostream &O) const {
+  O << "Header:\n";
+  printHeader(O);
+  O << "Load Commands:\n";
+  
+  O << "Buffer:\n";
+}
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
new file mode 100644
index 0000000..877cbfb
--- /dev/null
+++ b/lib/Object/MachOObjectFile.cpp
@@ -0,0 +1,327 @@
+//===- MachOObjectFile.cpp - Mach-O object file binding ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachOObjectFile class, which binds the MachOObject
+// class to the generic ObjectFile wrapper.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Object/MachOObject.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MachO.h"
+
+#include <cctype>
+#include <cstring>
+#include <limits>
+
+using namespace llvm;
+using namespace object;
+
+namespace llvm {
+
+typedef MachOObject::LoadCommandInfo LoadCommandInfo;
+
+class MachOObjectFile : public ObjectFile {
+public:
+  MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO)
+    : ObjectFile(Object),
+      MachOObj(MOO),
+      RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {}
+
+  virtual symbol_iterator begin_symbols() const;
+  virtual symbol_iterator end_symbols() const;
+  virtual section_iterator begin_sections() const;
+  virtual section_iterator end_sections() const;
+
+  virtual uint8_t getBytesInAddress() const;
+  virtual StringRef getFileFormatName() const;
+  virtual unsigned getArch() const;
+
+protected:
+  virtual SymbolRef getSymbolNext(DataRefImpl Symb) const;
+  virtual StringRef getSymbolName(DataRefImpl Symb) const;
+  virtual uint64_t  getSymbolAddress(DataRefImpl Symb) const;
+  virtual uint64_t  getSymbolSize(DataRefImpl Symb) const;
+  virtual char      getSymbolNMTypeChar(DataRefImpl Symb) const;
+  virtual bool      isSymbolInternal(DataRefImpl Symb) const;
+
+  virtual SectionRef getSectionNext(DataRefImpl Sec) const;
+  virtual StringRef  getSectionName(DataRefImpl Sec) const;
+  virtual uint64_t   getSectionAddress(DataRefImpl Sec) const;
+  virtual uint64_t   getSectionSize(DataRefImpl Sec) const;
+  virtual StringRef  getSectionContents(DataRefImpl Sec) const;
+  virtual bool       isSectionText(DataRefImpl Sec) const;
+
+private:
+  MachOObject *MachOObj;
+  mutable uint32_t RegisteredStringTable;
+
+  void moveToNextSection(DataRefImpl &DRI) const;
+  void getSymbolTableEntry(DataRefImpl DRI,
+                           InMemoryStruct<macho::SymbolTableEntry> &Res) const;
+  void moveToNextSymbol(DataRefImpl &DRI) const;
+  void getSection(DataRefImpl DRI, InMemoryStruct<macho::Section> &Res) const;
+};
+
+ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
+  std::string Err;
+  MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err);
+  if (!MachOObj)
+    return NULL;
+  return new MachOObjectFile(Buffer, MachOObj);
+}
+
+/*===-- Symbols -----------------------------------------------------------===*/
+
+void MachOObjectFile::moveToNextSymbol(DataRefImpl &DRI) const {
+  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
+  while (DRI.d.a < LoadCommandCount) {
+    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+    if (LCI.Command.Type == macho::LCT_Symtab) {
+      InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
+      MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+      if (DRI.d.b < SymtabLoadCmd->NumSymbolTableEntries)
+        return;
+    }
+
+    DRI.d.a++;
+    DRI.d.b = 0;
+  }
+}
+
+void MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI,
+    InMemoryStruct<macho::SymbolTableEntry> &Res) const {
+  InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
+  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+  MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+
+  if (RegisteredStringTable != DRI.d.a) {
+    MachOObj->RegisterStringTable(*SymtabLoadCmd);
+    RegisteredStringTable = DRI.d.a;
+  }
+
+  MachOObj->ReadSymbolTableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
+                                 Res);
+}
+
+
+SymbolRef MachOObjectFile::getSymbolNext(DataRefImpl DRI) const {
+  DRI.d.b++;
+  moveToNextSymbol(DRI);
+  return SymbolRef(DRI, this);
+}
+
+StringRef MachOObjectFile::getSymbolName(DataRefImpl DRI) const {
+  InMemoryStruct<macho::SymbolTableEntry> Entry;
+  getSymbolTableEntry(DRI, Entry);
+  return MachOObj->getStringAtIndex(Entry->StringIndex);
+}
+
+uint64_t MachOObjectFile::getSymbolAddress(DataRefImpl DRI) const {
+  InMemoryStruct<macho::SymbolTableEntry> Entry;
+  getSymbolTableEntry(DRI, Entry);
+  return Entry->Value;
+}
+
+uint64_t MachOObjectFile::getSymbolSize(DataRefImpl DRI) const {
+  return UnknownAddressOrSize;
+}
+
+char MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI) const {
+  InMemoryStruct<macho::SymbolTableEntry> Entry;
+  getSymbolTableEntry(DRI, Entry);
+
+  char Char;
+  switch (Entry->Type & macho::STF_TypeMask) {
+    case macho::STT_Undefined:
+      Char = 'u';
+      break;
+    case macho::STT_Absolute:
+    case macho::STT_Section:
+      Char = 's';
+      break;
+    default:
+      Char = '?';
+      break;
+  }
+
+  if (Entry->Flags & (macho::STF_External | macho::STF_PrivateExtern))
+    Char = toupper(Char);
+  return Char;
+}
+
+bool MachOObjectFile::isSymbolInternal(DataRefImpl DRI) const {
+  InMemoryStruct<macho::SymbolTableEntry> Entry;
+  getSymbolTableEntry(DRI, Entry);
+  return Entry->Flags & macho::STF_StabsEntryMask;
+}
+
+ObjectFile::symbol_iterator MachOObjectFile::begin_symbols() const {
+  // DRI.d.a = segment number; DRI.d.b = symbol index.
+  DataRefImpl DRI;
+  DRI.d.a = DRI.d.b = 0;
+  moveToNextSymbol(DRI);
+  return symbol_iterator(SymbolRef(DRI, this));
+}
+
+ObjectFile::symbol_iterator MachOObjectFile::end_symbols() const {
+  DataRefImpl DRI;
+  DRI.d.a = MachOObj->getHeader().NumLoadCommands;
+  DRI.d.b = 0;
+  return symbol_iterator(SymbolRef(DRI, this));
+}
+
+
+/*===-- Sections ----------------------------------------------------------===*/
+
+void MachOObjectFile::moveToNextSection(DataRefImpl &DRI) const {
+  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
+  while (DRI.d.a < LoadCommandCount) {
+    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+    if (LCI.Command.Type == macho::LCT_Segment) {
+      InMemoryStruct<macho::SegmentLoadCommand> SegmentLoadCmd;
+      MachOObj->ReadSegmentLoadCommand(LCI, SegmentLoadCmd);
+      if (DRI.d.b < SegmentLoadCmd->NumSections)
+        return;
+    } else if (LCI.Command.Type == macho::LCT_Segment64) {
+      InMemoryStruct<macho::Segment64LoadCommand> Segment64LoadCmd;
+      MachOObj->ReadSegment64LoadCommand(LCI, Segment64LoadCmd);
+      if (DRI.d.b < Segment64LoadCmd->NumSections)
+        return;
+    }
+
+    DRI.d.a++;
+    DRI.d.b = 0;
+  }
+}
+
+SectionRef MachOObjectFile::getSectionNext(DataRefImpl DRI) const {
+  DRI.d.b++;
+  moveToNextSection(DRI);
+  return SectionRef(DRI, this);
+}
+
+void
+MachOObjectFile::getSection(DataRefImpl DRI,
+                            InMemoryStruct<macho::Section> &Res) const {
+  InMemoryStruct<macho::SegmentLoadCommand> SLC;
+  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+  MachOObj->ReadSegmentLoadCommand(LCI, SLC);
+  MachOObj->ReadSection(LCI, DRI.d.b, Res);
+}
+
+StringRef MachOObjectFile::getSectionName(DataRefImpl DRI) const {
+  InMemoryStruct<macho::SegmentLoadCommand> SLC;
+  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+  MachOObj->ReadSegmentLoadCommand(LCI, SLC);
+  InMemoryStruct<macho::Section> Sect;
+  MachOObj->ReadSection(LCI, DRI.d.b, Sect);
+
+  static char Result[34];
+  strcpy(Result, SLC->Name);
+  strcat(Result, ",");
+  strcat(Result, Sect->Name);
+  return StringRef(Result);
+}
+
+uint64_t MachOObjectFile::getSectionAddress(DataRefImpl DRI) const {
+  InMemoryStruct<macho::Section> Sect;
+  getSection(DRI, Sect);
+  return Sect->Address;
+}
+
+uint64_t MachOObjectFile::getSectionSize(DataRefImpl DRI) const {
+  InMemoryStruct<macho::Section> Sect;
+  getSection(DRI, Sect);
+  return Sect->Size;
+}
+
+StringRef MachOObjectFile::getSectionContents(DataRefImpl DRI) const {
+  InMemoryStruct<macho::Section> Sect;
+  getSection(DRI, Sect);
+  return MachOObj->getData(Sect->Offset, Sect->Size);
+}
+
+bool MachOObjectFile::isSectionText(DataRefImpl DRI) const {
+  InMemoryStruct<macho::SegmentLoadCommand> SLC;
+  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+  MachOObj->ReadSegmentLoadCommand(LCI, SLC);
+  return !strcmp(SLC->Name, "__TEXT");
+}
+
+ObjectFile::section_iterator MachOObjectFile::begin_sections() const {
+  DataRefImpl DRI;
+  DRI.d.a = DRI.d.b = 0;
+  moveToNextSection(DRI);
+  return section_iterator(SectionRef(DRI, this));
+}
+
+ObjectFile::section_iterator MachOObjectFile::end_sections() const {
+  DataRefImpl DRI;
+  DRI.d.a = MachOObj->getHeader().NumLoadCommands;
+  DRI.d.b = 0;
+  return section_iterator(SectionRef(DRI, this));
+}
+
+/*===-- Miscellaneous -----------------------------------------------------===*/
+
+uint8_t MachOObjectFile::getBytesInAddress() const {
+  return MachOObj->is64Bit() ? 8 : 4;
+}
+
+StringRef MachOObjectFile::getFileFormatName() const {
+  if (!MachOObj->is64Bit()) {
+    switch (MachOObj->getHeader().CPUType) {
+    case llvm::MachO::CPUTypeI386:
+      return "Mach-O 32-bit i386";
+    case llvm::MachO::CPUTypeARM:
+      return "Mach-O arm";
+    case llvm::MachO::CPUTypePowerPC:
+      return "Mach-O 32-bit ppc";
+    default:
+      assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 0 &&
+             "64-bit object file when we're not 64-bit?");
+      return "Mach-O 32-bit unknown";
+    }
+  }
+
+  switch (MachOObj->getHeader().CPUType) {
+  case llvm::MachO::CPUTypeX86_64:
+    return "Mach-O 64-bit x86-64";
+  case llvm::MachO::CPUTypePowerPC64:
+    return "Mach-O 64-bit ppc64";
+  default:
+    assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 1 &&
+           "32-bit object file when we're 64-bit?");
+    return "Mach-O 64-bit unknown";
+  }
+}
+
+unsigned MachOObjectFile::getArch() const {
+  switch (MachOObj->getHeader().CPUType) {
+  case llvm::MachO::CPUTypeI386:
+    return Triple::x86;
+  case llvm::MachO::CPUTypeX86_64:
+    return Triple::x86_64;
+  case llvm::MachO::CPUTypeARM:
+    return Triple::arm;
+  case llvm::MachO::CPUTypePowerPC:
+    return Triple::ppc;
+  case llvm::MachO::CPUTypePowerPC64:
+    return Triple::ppc64;
+  default:
+    return Triple::UnknownArch;
+  }
+}
+
+} // end namespace llvm
+
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
new file mode 100644
index 0000000..603b23c
--- /dev/null
+++ b/lib/Object/Object.cpp
@@ -0,0 +1,59 @@
+//===- Object.cpp - C bindings to the object file library--------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the C bindings to the file-format-independent object
+// library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm-c/Object.h"
+
+using namespace llvm;
+using namespace object;
+
+LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) {
+  return wrap(ObjectFile::createObjectFile(unwrap(MemBuf)));
+}
+
+void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) {
+  delete unwrap(ObjectFile);
+}
+
+LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile) {
+  ObjectFile::section_iterator SI = unwrap(ObjectFile)->begin_sections();
+  return wrap(new ObjectFile::section_iterator(SI));
+}
+
+void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI) {
+  delete unwrap(SI);
+}
+
+LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile,
+                                LLVMSectionIteratorRef SI) {
+  return (*unwrap(SI) == unwrap(ObjectFile)->end_sections()) ? 1 : 0;
+}
+
+void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) {
+  // We can't use unwrap() here because the argument to ++ must be an lvalue.
+  ++*reinterpret_cast<ObjectFile::section_iterator*>(SI);
+}
+
+const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) {
+  return (*unwrap(SI))->getName().data();
+}
+
+uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) {
+  return (*unwrap(SI))->getSize();
+}
+
+const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) {
+  return (*unwrap(SI))->getContents().data();
+}
+
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index 161ae3a..47b6311 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -55,7 +55,7 @@ ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
     case sys::Mach_O_DynamicLinker_FileType:
     case sys::Mach_O_Bundle_FileType:
     case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
-      return 0;
+      return createMachOObjectFile(Object);
     case sys::COFF_FileType:
       return createCOFFObjectFile(Object);
     default:
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index e765ba0..c3169ac 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -726,7 +726,7 @@ APFloat::bitwiseIsEqual(const APFloat &rhs) const {
 }
 
 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
-{
+  : exponent2(0), sign2(0) {
   assertArithmeticOK(ourSemantics);
   initialize(&ourSemantics);
   sign = 0;
@@ -736,14 +736,15 @@ APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
   normalize(rmNearestTiesToEven, lfExactlyZero);
 }
 
-APFloat::APFloat(const fltSemantics &ourSemantics) {
+APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) {
   assertArithmeticOK(ourSemantics);
   initialize(&ourSemantics);
   category = fcZero;
   sign = false;
 }
 
-APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
+APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
+  : exponent2(0), sign2(0) {
   assertArithmeticOK(ourSemantics);
   // Allocates storage if necessary but does not initialize it.
   initialize(&ourSemantics);
@@ -751,7 +752,7 @@ APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
 
 APFloat::APFloat(const fltSemantics &ourSemantics,
                  fltCategory ourCategory, bool negative)
-{
+  : exponent2(0), sign2(0) {
   assertArithmeticOK(ourSemantics);
   initialize(&ourSemantics);
   category = ourCategory;
@@ -763,14 +764,13 @@ APFloat::APFloat(const fltSemantics &ourSemantics,
 }
 
 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
-{
+  : exponent2(0), sign2(0) {
   assertArithmeticOK(ourSemantics);
   initialize(&ourSemantics);
   convertFromString(text, rmNearestTiesToEven);
 }
 
-APFloat::APFloat(const APFloat &rhs)
-{
+APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) {
   initialize(rhs.semantics);
   assign(rhs);
 }
@@ -3257,18 +3257,15 @@ APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
   return Val;
 }
 
-APFloat::APFloat(const APInt& api, bool isIEEE)
-{
+APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) {
   initFromAPInt(api, isIEEE);
 }
 
-APFloat::APFloat(float f)
-{
+APFloat::APFloat(float f) : exponent2(0), sign2(0) {
   initFromAPInt(APInt::floatToBits(f));
 }
 
-APFloat::APFloat(double d)
-{
+APFloat::APFloat(double d) : exponent2(0), sign2(0) {
   initFromAPInt(APInt::doubleToBits(d));
 }
 
@@ -3565,3 +3562,37 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
   for (; I != NDigits; ++I)
     Str.push_back(buffer[NDigits-I-1]);
 }
+
+bool APFloat::getExactInverse(APFloat *inv) const {
+  // We can only guarantee the existence of an exact inverse for IEEE floats.
+  if (semantics != &IEEEhalf && semantics != &IEEEsingle &&
+      semantics != &IEEEdouble && semantics != &IEEEquad)
+    return false;
+
+  // Special floats and denormals have no exact inverse.
+  if (category != fcNormal)
+    return false;
+
+  // Check that the number is a power of two by making sure that only the
+  // integer bit is set in the significand.
+  if (significandLSB() != semantics->precision - 1)
+    return false;
+
+  // Get the inverse.
+  APFloat reciprocal(*semantics, 1ULL);
+  if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
+    return false;
+
+  // Avoid multiplication with a denormal, it is not safe on all platforms and
+  // may be slower than a normal division.
+  if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
+    return false;
+
+  assert(reciprocal.category == fcNormal &&
+         reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
+
+  if (inv)
+    *inv = reciprocal;
+
+  return true;
+}
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 08f36d2..23a22ac 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -1517,13 +1517,15 @@ APInt::ms APInt::magic() const {
 /// division by a constant as a sequence of multiplies, adds and shifts.
 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
 /// S. Warren, Jr., chapter 10.
-APInt::mu APInt::magicu() const {
+/// LeadingZeros can be used to simplify the calculation if the upper bits
+/// of the divided value are known zero.
+APInt::mu APInt::magicu(unsigned LeadingZeros) const {
   const APInt& d = *this;
   unsigned p;
   APInt nc, delta, q1, r1, q2, r2;
   struct mu magu;
   magu.a = 0;               // initialize "add" indicator
-  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
+  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
 
@@ -2076,6 +2078,16 @@ APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
   return Res;
 }
 
+APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this * RHS;
+
+  if (*this != 0 && RHS != 0)
+    Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
+  else
+    Overflow = false;
+  return Res;
+}
+
 APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
   Overflow = ShAmt >= getBitWidth();
   if (Overflow)
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index 5e27df6..215b0f2 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -136,6 +136,14 @@ unsigned BumpPtrAllocator::GetNumSlabs() const {
   return NumSlabs;
 }
 
+size_t BumpPtrAllocator::getTotalMemory() const {
+  size_t TotalMemory = 0;
+  for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
+    TotalMemory += Slab->Size;
+  }
+  return TotalMemory;
+}
+  
 void BumpPtrAllocator::PrintStats() const {
   unsigned NumSlabs = 0;
   size_t TotalMemory = 0;
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 7e74499..7f1c0d3 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -186,12 +186,14 @@ static Option *LookupOption(StringRef &Arg, StringRef &Value,
 /// have already been stripped.
 static Option *LookupNearestOption(StringRef Arg,
                                    const StringMap<Option*> &OptionsMap,
-                                   const char *&NearestString) {
+                                   std::string &NearestString) {
   // Reject all dashes.
   if (Arg.empty()) return 0;
 
   // Split on any equal sign.
-  StringRef LHS = Arg.split('=').first;
+  std::pair<StringRef, StringRef> SplitArg = Arg.split('=');
+  StringRef &LHS = SplitArg.first;  // LHS == Arg when no '=' is present.
+  StringRef &RHS = SplitArg.second;
 
   // Find the closest match.
   Option *Best = 0;
@@ -204,14 +206,19 @@ static Option *LookupNearestOption(StringRef Arg,
     if (O->ArgStr[0])
       OptionNames.push_back(O->ArgStr);
 
+    bool PermitValue = O->getValueExpectedFlag() != cl::ValueDisallowed;
+    StringRef Flag = PermitValue ? LHS : Arg;
     for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
       StringRef Name = OptionNames[i];
       unsigned Distance = StringRef(Name).edit_distance(
-        Arg, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance);
+        Flag, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance);
       if (!Best || Distance < BestDistance) {
         Best = O;
-        NearestString = OptionNames[i];
         BestDistance = Distance;
+	if (RHS.empty() || !PermitValue)
+	  NearestString = OptionNames[i];
+	else
+	  NearestString = std::string(OptionNames[i]) + "=" + RHS.str();
       }
     }
   }
@@ -611,7 +618,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
   for (int i = 1; i < argc; ++i) {
     Option *Handler = 0;
     Option *NearestHandler = 0;
-    const char *NearestHandlerString = 0;
+    std::string NearestHandlerString;
     StringRef Value;
     StringRef ArgName = "";
 
@@ -908,8 +915,6 @@ void alias::printOptionInfo(size_t GlobalWidth) const {
   errs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n";
 }
 
-
-
 //===----------------------------------------------------------------------===//
 // Parser Implementation code...
 //
@@ -939,7 +944,11 @@ void basic_parser_impl::printOptionInfo(const Option &O,
   outs().indent(GlobalWidth-getOptionWidth(O)) << " - " << O.HelpStr << '\n';
 }
 
-
+void basic_parser_impl::printOptionName(const Option &O,
+                                        size_t GlobalWidth) const {
+  outs() << "  -" << O.ArgStr;
+  outs().indent(GlobalWidth-std::strlen(O.ArgStr));
+}
 
 
 // parser<bool> implementation
@@ -1083,6 +1092,89 @@ void generic_parser_base::printOptionInfo(const Option &O,
   }
 }
 
+static const size_t MaxOptWidth = 8; // arbitrary spacing for printOptionDiff
+
+// printGenericOptionDiff - Print the value of this option and it's default.
+//
+// "Generic" options have each value mapped to a name.
+void generic_parser_base::
+printGenericOptionDiff(const Option &O, const GenericOptionValue &Value,
+                       const GenericOptionValue &Default,
+                       size_t GlobalWidth) const {
+  outs() << "  -" << O.ArgStr;
+  outs().indent(GlobalWidth-std::strlen(O.ArgStr));
+
+  unsigned NumOpts = getNumOptions();
+  for (unsigned i = 0; i != NumOpts; ++i) {
+    if (Value.compare(getOptionValue(i)))
+      continue;
+
+    outs() << "= " << getOption(i);
+    size_t L = std::strlen(getOption(i));
+    size_t NumSpaces = MaxOptWidth > L ? MaxOptWidth - L : 0;
+    outs().indent(NumSpaces) << " (default: ";
+    for (unsigned j = 0; j != NumOpts; ++j) {
+      if (Default.compare(getOptionValue(j)))
+        continue;
+      outs() << getOption(j);
+      break;
+    }
+    outs() << ")\n";
+    return;
+  }
+  outs() << "= *unknown option value*\n";
+}
+
+// printOptionDiff - Specializations for printing basic value types.
+//
+#define PRINT_OPT_DIFF(T)                                               \
+  void parser<T>::                                                      \
+  printOptionDiff(const Option &O, T V, OptionValue<T> D,               \
+                  size_t GlobalWidth) const {                           \
+    printOptionName(O, GlobalWidth);                                    \
+    std::string Str;                                                    \
+    {                                                                   \
+      raw_string_ostream SS(Str);                                       \
+      SS << V;                                                          \
+    }                                                                   \
+    outs() << "= " << Str;                                              \
+    size_t NumSpaces = MaxOptWidth > Str.size() ? MaxOptWidth - Str.size() : 0;\
+    outs().indent(NumSpaces) << " (default: ";                          \
+    if (D.hasValue())                                                   \
+      outs() << D.getValue();                                           \
+    else                                                                \
+      outs() << "*no default*";                                         \
+    outs() << ")\n";                                                    \
+  }                                                                     \
+
+PRINT_OPT_DIFF(bool)
+PRINT_OPT_DIFF(boolOrDefault)
+PRINT_OPT_DIFF(int)
+PRINT_OPT_DIFF(unsigned)
+PRINT_OPT_DIFF(double)
+PRINT_OPT_DIFF(float)
+PRINT_OPT_DIFF(char)
+
+void parser<std::string>::
+printOptionDiff(const Option &O, StringRef V, OptionValue<std::string> D,
+                size_t GlobalWidth) const {
+  printOptionName(O, GlobalWidth);
+  outs() << "= " << V;
+  size_t NumSpaces = MaxOptWidth > V.size() ? MaxOptWidth - V.size() : 0;
+  outs().indent(NumSpaces) << " (default: ";
+  if (D.hasValue())
+    outs() << D.getValue();
+  else
+    outs() << "*no default*";
+  outs() << ")\n";
+}
+
+// Print a placeholder for options that don't yet support printOptionDiff().
+void basic_parser_impl::
+printOptionNoValue(const Option &O, size_t GlobalWidth) const {
+  printOptionName(O, GlobalWidth);
+  outs() << "= *cannot print option value*\n";
+}
 
 //===----------------------------------------------------------------------===//
 // -help and -help-hidden option implementation
@@ -1094,6 +1186,35 @@ static int OptNameCompare(const void *LHS, const void *RHS) {
   return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first);
 }
 
+// Copy Options into a vector so we can sort them as we like.
+static void
+sortOpts(StringMap<Option*> &OptMap,
+         SmallVectorImpl< std::pair<const char *, Option*> > &Opts,
+         bool ShowHidden) {
+  SmallPtrSet<Option*, 128> OptionSet;  // Duplicate option detection.
+
+  for (StringMap<Option*>::iterator I = OptMap.begin(), E = OptMap.end();
+       I != E; ++I) {
+    // Ignore really-hidden options.
+    if (I->second->getOptionHiddenFlag() == ReallyHidden)
+      continue;
+
+    // Unless showhidden is set, ignore hidden flags.
+    if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden)
+      continue;
+
+    // If we've already seen this option, don't add it to the list again.
+    if (!OptionSet.insert(I->second))
+      continue;
+
+    Opts.push_back(std::pair<const char *, Option*>(I->getKey().data(),
+                                                    I->second));
+  }
+
+  // Sort the options list alphabetically.
+  qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare);
+}
+
 namespace {
 
 class HelpPrinter {
@@ -1115,30 +1236,8 @@ public:
     StringMap<Option*> OptMap;
     GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
 
-    // Copy Options into a vector so we can sort them as we like.
     SmallVector<std::pair<const char *, Option*>, 128> Opts;
-    SmallPtrSet<Option*, 128> OptionSet;  // Duplicate option detection.
-
-    for (StringMap<Option*>::iterator I = OptMap.begin(), E = OptMap.end();
-         I != E; ++I) {
-      // Ignore really-hidden options.
-      if (I->second->getOptionHiddenFlag() == ReallyHidden)
-        continue;
-
-      // Unless showhidden is set, ignore hidden flags.
-      if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden)
-        continue;
-
-      // If we've already seen this option, don't add it to the list again.
-      if (!OptionSet.insert(I->second))
-        continue;
-
-      Opts.push_back(std::pair<const char *, Option*>(I->getKey().data(),
-                                                      I->second));
-    }
-
-    // Sort the options list alphabetically.
-    qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare);
+    sortOpts(OptMap, Opts, ShowHidden);
 
     if (ProgramOverview)
       outs() << "OVERVIEW: " << ProgramOverview << "\n";
@@ -1197,6 +1296,38 @@ static cl::opt<HelpPrinter, true, parser<bool> >
 HHOp("help-hidden", cl::desc("Display all available options"),
      cl::location(HiddenPrinter), cl::Hidden, cl::ValueDisallowed);
 
+static cl::opt<bool>
+PrintOptions("print-options",
+             cl::desc("Print non-default options after command line parsing"),
+             cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+PrintAllOptions("print-all-options",
+                cl::desc("Print all option values after command line parsing"),
+                cl::Hidden, cl::init(false));
+
+// Print the value of each option.
+void cl::PrintOptionValues() {
+  if (!PrintOptions && !PrintAllOptions) return;
+
+  // Get all the options.
+  SmallVector<Option*, 4> PositionalOpts;
+  SmallVector<Option*, 4> SinkOpts;
+  StringMap<Option*> OptMap;
+  GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
+
+  SmallVector<std::pair<const char *, Option*>, 128> Opts;
+  sortOpts(OptMap, Opts, /*ShowHidden*/true);
+
+  // Compute the maximum argument length...
+  size_t MaxArgLen = 0;
+  for (size_t i = 0, e = Opts.size(); i != e; ++i)
+    MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth());
+
+  for (size_t i = 0, e = Opts.size(); i != e; ++i)
+    Opts[i].second->printOptionValue(MaxArgLen, PrintAllOptions);
+}
+
 static void (*OverrideVersionPrinter)() = 0;
 
 static int TargetArraySortFn(const void *LHS, const void *RHS) {
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index bf8ca3f..899c389 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -57,12 +57,36 @@ public:
 static sys::Mutex gCrashRecoveryContexMutex;
 static bool gCrashRecoveryEnabled = false;
 
+static sys::ThreadLocal<const CrashRecoveryContextCleanup> 
+       tlIsRecoveringFromCrash;
+
+CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {}
+
 CrashRecoveryContext::~CrashRecoveryContext() {
+  // Reclaim registered resources.
+  CrashRecoveryContextCleanup *i = head;
+  tlIsRecoveringFromCrash.set(head);
+  while (i) {
+    CrashRecoveryContextCleanup *tmp = i;
+    i = tmp->next;
+    tmp->cleanupFired = true;
+    tmp->recoverResources();
+    delete tmp;
+  }
+  tlIsRecoveringFromCrash.erase();
+  
   CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
   delete CRCI;
 }
 
+bool CrashRecoveryContext::isRecoveringFromCrash() {
+  return tlIsRecoveringFromCrash.get() != 0;
+}
+
 CrashRecoveryContext *CrashRecoveryContext::GetCurrent() {
+  if (!gCrashRecoveryEnabled)
+    return 0;
+
   const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
   if (!CRCI)
     return 0;
@@ -70,6 +94,33 @@ CrashRecoveryContext *CrashRecoveryContext::GetCurrent() {
   return CRCI->CRC;
 }
 
+void CrashRecoveryContext::registerCleanup(CrashRecoveryContextCleanup *cleanup)
+{
+  if (!cleanup)
+    return;
+  if (head)
+    head->prev = cleanup;
+  cleanup->next = head;
+  head = cleanup;
+}
+
+void
+CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) {
+  if (!cleanup)
+    return;
+  if (cleanup == head) {
+    head = cleanup->next;
+    if (head)
+      head->prev = 0;
+  }
+  else {
+    cleanup->prev->next = cleanup->next;
+    if (cleanup->next)
+      cleanup->next->prev = cleanup->prev;
+  }
+  delete cleanup;
+}
+
 #ifdef LLVM_ON_WIN32
 
 // FIXME: No real Win32 implementation currently.
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index 9799ef5..74a9fda 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -203,6 +203,10 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
   case DW_AT_APPLE_major_runtime_vers:   return "DW_AT_APPLE_major_runtime_vers";
   case DW_AT_APPLE_runtime_class:        return "DW_AT_APPLE_runtime_class";
   case DW_AT_APPLE_omit_frame_ptr:       return "DW_AT_APPLE_omit_frame_ptr";
+  case DW_AT_APPLE_property_name:        return "DW_AT_APPLE_property_name";
+  case DW_AT_APPLE_property_getter:      return "DW_AT_APPLE_property_getter";
+  case DW_AT_APPLE_property_setter:      return "DW_AT_APPLE_property_setter";
+  case DW_AT_APPLE_property_attribute:   return "DW_AT_APPLE_property_attribute";
   }
   return 0;
 }
@@ -391,6 +395,7 @@ const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
   case DW_OP_call_ref:                   return "DW_OP_call_ref";
   case DW_OP_form_tls_address:           return "DW_OP_form_tls_address";
   case DW_OP_call_frame_cfa:             return "DW_OP_call_frame_cfa";
+  case DW_OP_bit_piece:                  return "DW_OP_bit_piece";
   case DW_OP_lo_user:                    return "DW_OP_lo_user";
   case DW_OP_hi_user:                    return "DW_OP_hi_user";
   }
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index 3579546..e6cc57d 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -32,7 +32,6 @@
 #endif
 
 using namespace llvm;
-using namespace std;
 
 static fatal_error_handler_t ErrorHandler = 0;
 static void *ErrorHandlerUserData = 0;
diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp
index 5dbabee..4c8c0c6 100644
--- a/lib/Support/FileUtilities.cpp
+++ b/lib/Support/FileUtilities.cpp
@@ -198,7 +198,7 @@ int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA,
     return 1;
   }
 
-  // Now its safe to mmap the files into memory becasue both files
+  // Now its safe to mmap the files into memory because both files
   // have a non-zero size.
   error_code ec;
   OwningPtr<MemoryBuffer> F1;
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index a4f80a9..d2e35b8 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -147,6 +147,11 @@ void FoldingSetNodeID::AddString(StringRef String) {
   Bits.push_back(V);
 }
 
+// AddNodeID - Adds the Bit data of another ID to *this.
+void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
+  Bits.append(ID.Bits.begin(), ID.Bits.end());
+}
+
 /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to 
 /// lookup the node in the FoldingSetImpl.
 unsigned FoldingSetNodeID::ComputeHash() const {
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 4dacf96..911c64a 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -214,6 +214,8 @@ std::string sys::getHostCPUName() {
                // As found in a Summer 2010 model iMac.
       case 37: // Intel Core i7, laptop version.
         return "corei7";
+      case 42: // SandyBridge
+        return "sandybridge";
 
       case 28: // Intel Atom processor. All processors are manufactured using
                // the 45 nm process
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index a0c650d..e2b5b7a 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -46,8 +46,10 @@ MemoryBuffer::~MemoryBuffer() { }
 
 /// init - Initialize this MemoryBuffer as a reference to externally allocated
 /// memory, memory that we know is already null terminated.
-void MemoryBuffer::init(const char *BufStart, const char *BufEnd) {
-  assert(BufEnd[0] == 0 && "Buffer is not null terminated!");
+void MemoryBuffer::init(const char *BufStart, const char *BufEnd,
+                        bool RequiresNullTerminator) {
+  assert((!RequiresNullTerminator || BufEnd[0] == 0) &&
+         "Buffer is not null terminated!");
   BufferStart = BufStart;
   BufferEnd = BufEnd;
 }
@@ -65,32 +67,39 @@ static void CopyStringRef(char *Memory, StringRef Data) {
 
 /// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it.
 template <typename T>
-static T* GetNamedBuffer(StringRef Buffer, StringRef Name) {
+static T* GetNamedBuffer(StringRef Buffer, StringRef Name,
+                         bool RequiresNullTerminator) {
   char *Mem = static_cast<char*>(operator new(sizeof(T) + Name.size() + 1));
   CopyStringRef(Mem + sizeof(T), Name);
-  return new (Mem) T(Buffer);
+  return new (Mem) T(Buffer, RequiresNullTerminator);
 }
 
 namespace {
 /// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory.
 class MemoryBufferMem : public MemoryBuffer {
 public:
-  MemoryBufferMem(StringRef InputData) {
-    init(InputData.begin(), InputData.end());
+  MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) {
+    init(InputData.begin(), InputData.end(), RequiresNullTerminator);
   }
 
   virtual const char *getBufferIdentifier() const {
      // The name is stored after the class itself.
     return reinterpret_cast<const char*>(this + 1);
   }
+  
+  virtual BufferKind getBufferKind() const {
+    return MemoryBuffer_Malloc;
+  }
 };
 }
 
 /// getMemBuffer - Open the specified memory range as a MemoryBuffer.  Note
 /// that EndPtr[0] must be a null byte and be accessible!
 MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData,
-                                         StringRef BufferName) {
-  return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName);
+                                         StringRef BufferName,
+                                         bool RequiresNullTerminator) {
+  return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName,
+                                         RequiresNullTerminator);
 }
 
 /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
@@ -127,7 +136,7 @@ MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size,
   char *Buf = Mem + AlignedStringLen;
   Buf[Size] = 0; // Null terminate buffer.
 
-  return new (Mem) MemoryBufferMem(StringRef(Buf, Size));
+  return new (Mem) MemoryBufferMem(StringRef(Buf, Size), true);
 }
 
 /// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that
@@ -172,26 +181,41 @@ namespace {
 /// sys::Path::UnMapFilePages method.
 class MemoryBufferMMapFile : public MemoryBufferMem {
 public:
-  MemoryBufferMMapFile(StringRef Buffer)
-    : MemoryBufferMem(Buffer) { }
+  MemoryBufferMMapFile(StringRef Buffer, bool RequiresNullTerminator)
+    : MemoryBufferMem(Buffer, RequiresNullTerminator) { }
 
   ~MemoryBufferMMapFile() {
-    sys::Path::UnMapFilePages(getBufferStart(), getBufferSize());
+    static int PageSize = sys::Process::GetPageSize();
+
+    uintptr_t Start = reinterpret_cast<uintptr_t>(getBufferStart());
+    size_t Size = getBufferSize();
+    uintptr_t RealStart = Start & ~(PageSize - 1);
+    size_t RealSize = Size + (Start - RealStart);
+
+    sys::Path::UnMapFilePages(reinterpret_cast<const char*>(RealStart),
+                              RealSize);
+  }
+  
+  virtual BufferKind getBufferKind() const {
+    return MemoryBuffer_MMap;
   }
 };
 }
 
 error_code MemoryBuffer::getFile(StringRef Filename,
                                  OwningPtr<MemoryBuffer> &result,
-                                 int64_t FileSize) {
+                                 int64_t FileSize,
+                                 bool RequiresNullTerminator) {
   // Ensure the path is null terminated.
   SmallString<256> PathBuf(Filename.begin(), Filename.end());
-  return MemoryBuffer::getFile(PathBuf.c_str(), result, FileSize);
+  return MemoryBuffer::getFile(PathBuf.c_str(), result, FileSize,
+                               RequiresNullTerminator);
 }
 
 error_code MemoryBuffer::getFile(const char *Filename,
                                  OwningPtr<MemoryBuffer> &result,
-                                 int64_t FileSize) {
+                                 int64_t FileSize,
+                                 bool RequiresNullTerminator) {
   int OpenFlags = O_RDONLY;
 #ifdef O_BINARY
   OpenFlags |= O_BINARY;  // Open input file in binary mode on win32.
@@ -200,17 +224,32 @@ error_code MemoryBuffer::getFile(const char *Filename,
   if (FD == -1) {
     return error_code(errno, posix_category());
   }
-  error_code ret = getOpenFile(FD, Filename, result, FileSize);
+  error_code ret = getOpenFile(FD, Filename, result, FileSize, FileSize,
+                               0, RequiresNullTerminator);
   close(FD);
   return ret;
 }
 
-error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
-                                     OwningPtr<MemoryBuffer> &result,
-                                     int64_t FileSize) {
+static bool shouldUseMmap(int FD,
+                          size_t FileSize,
+                          size_t MapSize,
+                          off_t Offset,
+                          bool RequiresNullTerminator,
+                          int PageSize) {
+  // We don't use mmap for small files because this can severely fragment our
+  // address space.
+  if (MapSize < 4096*4)
+    return false;
+
+  if (!RequiresNullTerminator)
+    return true;
+
+
   // If we don't know the file size, use fstat to find out.  fstat on an open
   // file descriptor is cheaper than stat on a random path.
-  if (FileSize == -1) {
+  // FIXME: this chunk of code is duplicated, but it avoids a fstat when
+  // RequiresNullTerminator = false and MapSize != -1.
+  if (FileSize == size_t(-1)) {
     struct stat FileInfo;
     // TODO: This should use fstat64 when available.
     if (fstat(FD, &FileInfo) == -1) {
@@ -219,23 +258,59 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
     FileSize = FileInfo.st_size;
   }
 
+  // If we need a null terminator and the end of the map is inside the file,
+  // we cannot use mmap.
+  size_t End = Offset + MapSize;
+  assert(End <= FileSize);
+  if (End != FileSize)
+    return false;
 
-  // If the file is large, try to use mmap to read it in.  We don't use mmap
-  // for small files, because this can severely fragment our address space. Also
-  // don't try to map files that are exactly a multiple of the system page size,
-  // as the file would not have the required null terminator.
-  //
-  // FIXME: Can we just mmap an extra page in the latter case?
-  if (FileSize >= 4096*4 &&
-      (FileSize & (sys::Process::GetPageSize()-1)) != 0) {
-    if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) {
+  // Don't try to map files that are exactly a multiple of the system page size
+  // if we need a null terminator.
+  if ((FileSize & (PageSize -1)) == 0)
+    return false;
+
+  return true;
+}
+
+error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
+                                     OwningPtr<MemoryBuffer> &result,
+                                     size_t FileSize, size_t MapSize,
+                                     off_t Offset,
+                                     bool RequiresNullTerminator) {
+  static int PageSize = sys::Process::GetPageSize();
+
+  // Default is to map the full file.
+  if (MapSize == size_t(-1)) {
+    // If we don't know the file size, use fstat to find out.  fstat on an open
+    // file descriptor is cheaper than stat on a random path.
+    if (FileSize == size_t(-1)) {
+      struct stat FileInfo;
+      // TODO: This should use fstat64 when available.
+      if (fstat(FD, &FileInfo) == -1) {
+        return error_code(errno, posix_category());
+      }
+      FileSize = FileInfo.st_size;
+    }
+    MapSize = FileSize;
+  }
+
+  if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
+                    PageSize)) {
+    off_t RealMapOffset = Offset & ~(PageSize - 1);
+    off_t Delta = Offset - RealMapOffset;
+    size_t RealMapSize = MapSize + Delta;
+
+    if (const char *Pages = sys::Path::MapInFilePages(FD,
+                                                      RealMapSize,
+                                                      RealMapOffset)) {
       result.reset(GetNamedBuffer<MemoryBufferMMapFile>(
-        StringRef(Pages, FileSize), Filename));
+          StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator));
       return success;
     }
   }
 
-  MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(FileSize, Filename);
+  MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
   if (!Buf) {
     // Failed to create a buffer. The only way it can fail is if
     // new(std::nothrow) returns 0.
@@ -245,7 +320,10 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
   OwningPtr<MemoryBuffer> SB(Buf);
   char *BufPtr = const_cast<char*>(SB->getBufferStart());
 
-  size_t BytesLeft = FileSize;
+  size_t BytesLeft = MapSize;
+  if (lseek(FD, Offset, SEEK_SET) == -1)
+    return error_code(errno, posix_category());
+
   while (BytesLeft) {
     ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
     if (NumRead == -1) {
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index e5e875b..8fbaf2d 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -15,11 +15,15 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Endian.h"
 #include <cassert>
 #include <cstring>
 #include <ostream>
 using namespace llvm;
 using namespace sys;
+namespace {
+using support::ulittle32_t;
+}
 
 //===----------------------------------------------------------------------===//
 //=== WARNING: Implementation here must contain only TRULY operating system
@@ -88,15 +92,21 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
       }
       break;
 
+      // The two magic numbers for mach-o are:
+      // 0xfeedface - 32-bit mach-o
+      // 0xfeedfacf - 64-bit mach-o
     case 0xFE:
-    case 0xCE: {
+    case 0xCE:
+    case 0xCF: {
       uint16_t type = 0;
       if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
-          magic[2] == char(0xFA) && magic[3] == char(0xCE)) {
+          magic[2] == char(0xFA) && 
+          (magic[3] == char(0xCE) || magic[3] == char(0xCF))) {
         /* Native endian */
         if (length >= 16) type = magic[14] << 8 | magic[15];
-      } else if (magic[0] == char(0xCE) && magic[1] == char(0xFA) &&
-                 magic[2] == char(0xED) && magic[3] == char(0xFE)) {
+      } else if ((magic[0] == char(0xCE) || magic[0] == char(0xCF)) &&
+                 magic[1] == char(0xFA) && magic[2] == char(0xED) &&
+                 magic[3] == char(0xFE)) {
         /* Reverse endian */
         if (length >= 14) type = magic[13] << 8 | magic[12];
       }
@@ -129,6 +139,16 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
       if (magic[1] == 0x02)
         return COFF_FileType;
       break;
+
+    case 0x4d: // Possible MS-DOS stub on Windows PE file
+      if (magic[1] == 0x5a) {
+        uint32_t off = *reinterpret_cast<const ulittle32_t *>(magic + 0x3c);
+        // PE/COFF file, either EXE or DLL.
+        if (off < length && memcmp(magic + off, "PE\0\0",4) == 0)
+          return COFF_FileType;
+      }
+      break;
+
     case 0x64: // x86-64 Windows.
       if (magic[1] == char(0x86))
         return COFF_FileType;
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index a9f4709..082b701 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This file defines some helpful functions for dealing with the possibility of
-// Unix signals occuring while your program is running.
+// Unix signals occurring while your program is running.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index 309ffb0..d293da0 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -82,7 +82,7 @@ bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
         Matches->push_back(StringRef());
         continue;
       }
-      assert(pm[i].rm_eo > pm[i].rm_so);
+      assert(pm[i].rm_eo >= pm[i].rm_so);
       Matches->push_back(StringRef(String.data()+pm[i].rm_so,
                                    pm[i].rm_eo-pm[i].rm_so));
     }
diff --git a/lib/Support/Signals.cpp b/lib/Support/Signals.cpp
index a3af37d..a117893 100644
--- a/lib/Support/Signals.cpp
+++ b/lib/Support/Signals.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This file defines some helpful functions for dealing with the possibility of
-// Unix signals occuring while your program is running.
+// Unix signals occurring while your program is running.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp
index 504e649..997ce0b 100644
--- a/lib/Support/SmallPtrSet.cpp
+++ b/lib/Support/SmallPtrSet.cpp
@@ -52,10 +52,14 @@ bool SmallPtrSetImpl::insert_imp(const void * Ptr) {
     // Otherwise, hit the big set case, which will call grow.
   }
   
-  // If more than 3/4 of the array is full, grow.
-  if (NumElements*4 >= CurArraySize*3 ||
-      CurArraySize-(NumElements+NumTombstones) < CurArraySize/8)
-    Grow();
+  if (NumElements*4 >= CurArraySize*3) {
+    // If more than 3/4 of the array is full, grow.
+    Grow(CurArraySize < 64 ? 128 : CurArraySize*2);
+  } else if (CurArraySize-(NumElements+NumTombstones) < CurArraySize/8) {
+    // If fewer of 1/8 of the array is empty (meaning that many are filled with
+    // tombstones), rehash.
+    Grow(CurArraySize);
+  }
   
   // Okay, we know we have space.  Find a hash bucket.
   const void **Bucket = const_cast<const void**>(FindBucketFor(Ptr));
@@ -125,10 +129,9 @@ const void * const *SmallPtrSetImpl::FindBucketFor(const void *Ptr) const {
 
 /// Grow - Allocate a larger backing store for the buckets and move it over.
 ///
-void SmallPtrSetImpl::Grow() {
+void SmallPtrSetImpl::Grow(unsigned NewSize) {
   // Allocate at twice as many buckets, but at least 128.
   unsigned OldSize = CurArraySize;
-  unsigned NewSize = OldSize < 64 ? 128 : OldSize*2;
   
   const void **OldBuckets = CurArray;
   bool WasSmall = isSmall();
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index f0ed626..1e733d9 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -101,6 +101,10 @@ void llvm::EnableStatistics() {
   Enabled.setValue(true);
 }
 
+bool llvm::AreStatisticsEnabled() {
+  return Enabled;
+}
+
 void llvm::PrintStatistics(raw_ostream &OS) {
   StatisticInfo &Stats = *StatInfo;
 
diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp
index 90ec299..a1ac512 100644
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@@ -169,6 +169,8 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
   TheTable[Bucket].Item = getTombstoneVal();
   --NumItems;
   ++NumTombstones;
+  assert(NumItems + NumTombstones <= NumBuckets);
+
   return Result;
 }
 
@@ -177,7 +179,19 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
 /// RehashTable - Grow the table, redistributing values into the buckets with
 /// the appropriate mod-of-hashtable-size.
 void StringMapImpl::RehashTable() {
-  unsigned NewSize = NumBuckets*2;
+  unsigned NewSize;
+
+  // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
+  // the buckets are empty (meaning that many are filled with tombstones),
+  // grow/rehash the table.
+  if (NumItems*4 > NumBuckets*3) {
+    NewSize = NumBuckets*2;
+  } else if (NumBuckets-(NumItems+NumTombstones) < NumBuckets/8) {
+    NewSize = NumBuckets;
+  } else {
+    return;
+  }
+
   // Allocate one extra bucket which will always be non-empty.  This allows the
   // iterators to stop at end.
   ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket));
@@ -212,4 +226,5 @@ void StringMapImpl::RehashTable() {
   
   TheTable = NewTableArray;
   NumBuckets = NewSize;
+  NumTombstones = 0;
 }
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index 5398051..8c3fc09 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -131,7 +131,7 @@ unsigned StringRef::edit_distance(llvm::StringRef Other,
 
 /// find - Search for the first string \arg Str in the string.
 ///
-/// \return - The index of the first occurence of \arg Str, or npos if not
+/// \return - The index of the first occurrence of \arg Str, or npos if not
 /// found.
 size_t StringRef::find(StringRef Str, size_t From) const {
   size_t N = Str.size();
@@ -145,7 +145,7 @@ size_t StringRef::find(StringRef Str, size_t From) const {
 
 /// rfind - Search for the last string \arg Str in the string.
 ///
-/// \return - The index of the last occurence of \arg Str, or npos if not
+/// \return - The index of the last occurrence of \arg Str, or npos if not
 /// found.
 size_t StringRef::rfind(StringRef Str) const {
   size_t N = Str.size();
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 36edf6e..dbdb303 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -41,7 +41,8 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case x86_64:  return "x86_64";
   case xcore:   return "xcore";
   case mblaze:  return "mblaze";
-  case ptx:     return "ptx";
+  case ptx32:   return "ptx32";
+  case ptx64:   return "ptx64";
   }
 
   return "<invalid>";
@@ -74,7 +75,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
 
   case xcore:   return "xcore";
 
-  case ptx:     return "ptx";
+  case ptx32:   return "ptx";
+  case ptx64:   return "ptx";
   }
 }
 
@@ -84,6 +86,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) {
 
   case Apple: return "apple";
   case PC: return "pc";
+  case SCEI: return "scei";
   }
 
   return "<invalid>";
@@ -98,8 +101,10 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case Darwin: return "darwin";
   case DragonFly: return "dragonfly";
   case FreeBSD: return "freebsd";
+  case IOS: return "ios";
   case Linux: return "linux";
   case Lv2: return "lv2";
+  case MacOSX: return "macosx";
   case MinGW32: return "mingw32";
   case NetBSD: return "netbsd";
   case OpenBSD: return "openbsd";
@@ -162,8 +167,10 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     return x86_64;
   if (Name == "xcore")
     return xcore;
-  if (Name == "ptx")
-    return ptx;
+  if (Name == "ptx32")
+    return ptx32;
+  if (Name == "ptx64")
+    return ptx64;
 
   return UnknownArch;
 }
@@ -202,15 +209,17 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
       Str == "armv6" || Str == "armv7")
     return Triple::arm;
 
-  if (Str == "ptx")
-    return Triple::ptx;
+  if (Str == "ptx32")
+    return Triple::ptx32;
+  if (Str == "ptx64")
+    return Triple::ptx64;
 
   return Triple::UnknownArch;
 }
 
 // Returns architecture name that is understood by the target assembler.
 const char *Triple::getArchNameForAssembler() {
-  if (getOS() != Triple::Darwin && getVendor() != Triple::Apple)
+  if (!isOSDarwin() && getVendor() != Triple::Apple)
     return NULL;
 
   StringRef Str = getArchName();
@@ -235,8 +244,10 @@ const char *Triple::getArchNameForAssembler() {
     return "armv6";
   if (Str == "armv7" || Str == "thumbv7")
     return "armv7";
-  if (Str == "ptx")
-    return "ptx";
+  if (Str == "ptx32")
+    return "ptx32";
+  if (Str == "ptx64")
+    return "ptx64";
   return NULL;
 }
 
@@ -285,8 +296,10 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) {
     return tce;
   else if (ArchName == "xcore")
     return xcore;
-  else if (ArchName == "ptx")
-    return ptx;
+  else if (ArchName == "ptx32")
+    return ptx32;
+  else if (ArchName == "ptx64")
+    return ptx64;
   else
     return UnknownArch;
 }
@@ -296,6 +309,8 @@ Triple::VendorType Triple::ParseVendor(StringRef VendorName) {
     return Apple;
   else if (VendorName == "pc")
     return PC;
+  else if (VendorName == "scei")
+    return SCEI;
   else
     return UnknownVendor;
 }
@@ -311,10 +326,14 @@ Triple::OSType Triple::ParseOS(StringRef OSName) {
     return DragonFly;
   else if (OSName.startswith("freebsd"))
     return FreeBSD;
+  else if (OSName.startswith("ios"))
+    return IOS;
   else if (OSName.startswith("linux"))
     return Linux;
   else if (OSName.startswith("lv2"))
     return Lv2;
+  else if (OSName.startswith("macosx"))
+    return MacOSX;
   else if (OSName.startswith("mingw32"))
     return MinGW32;
   else if (OSName.startswith("netbsd"))
@@ -523,67 +542,44 @@ StringRef Triple::getOSAndEnvironmentName() const {
 
 static unsigned EatNumber(StringRef &Str) {
   assert(!Str.empty() && Str[0] >= '0' && Str[0] <= '9' && "Not a number");
-  unsigned Result = Str[0]-'0';
+  unsigned Result = 0;
 
-  // Eat the digit.
-  Str = Str.substr(1);
-
-  // Handle "darwin11".
-  if (Result == 1 && !Str.empty() && Str[0] >= '0' && Str[0] <= '9') {
+  do {
+    // Consume the leading digit.
     Result = Result*10 + (Str[0] - '0');
+
     // Eat the digit.
     Str = Str.substr(1);
-  }
+  } while (!Str.empty() && Str[0] >= '0' && Str[0] <= '9');
 
   return Result;
 }
 
-/// getDarwinNumber - Parse the 'darwin number' out of the specific target
-/// triple.  For example, if we have darwin8.5 return 8,5,0.  If any entry is
-/// not defined, return 0's.  This requires that the triple have an OSType of
-/// darwin before it is called.
-void Triple::getDarwinNumber(unsigned &Maj, unsigned &Min,
-                             unsigned &Revision) const {
-  assert(getOS() == Darwin && "Not a darwin target triple!");
+void Triple::getOSVersion(unsigned &Major, unsigned &Minor,
+                          unsigned &Micro) const {
   StringRef OSName = getOSName();
-  assert(OSName.startswith("darwin") && "Unknown darwin target triple!");
-
-  // Strip off "darwin".
-  OSName = OSName.substr(6);
-
-  Maj = Min = Revision = 0;
-
-  if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
-    return;
 
-  // The major version is the first digit.
-  Maj = EatNumber(OSName);
-  if (OSName.empty()) return;
+  // Assume that the OS portion of the triple starts with the canonical name.
+  StringRef OSTypeName = getOSTypeName(getOS());
+  if (OSName.startswith(OSTypeName))
+    OSName = OSName.substr(OSTypeName.size());
 
-  // Handle minor version: 10.4.9 -> darwin8.9.
-  if (OSName[0] != '.')
-    return;
+  // Any unset version defaults to 0.
+  Major = Minor = Micro = 0;
 
-  // Eat the '.'.
-  OSName = OSName.substr(1);
-
-  if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
-    return;
-
-  Min = EatNumber(OSName);
-  if (OSName.empty()) return;
-
-  // Handle revision darwin8.9.1
-  if (OSName[0] != '.')
-    return;
-
-  // Eat the '.'.
-  OSName = OSName.substr(1);
+  // Parse up to three components.
+  unsigned *Components[3] = { &Major, &Minor, &Micro };
+  for (unsigned i = 0; i != 3; ++i) {
+    if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
+      break;
 
-  if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
-    return;
+    // Consume the leading number.
+    *Components[i] = EatNumber(OSName);
 
-  Revision = EatNumber(OSName);
+    // Consume the separator, if present.
+    if (OSName.startswith("."))
+      OSName = OSName.substr(1);
+  }
 }
 
 void Triple::setTriple(const Twine &Str) {
diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc
index ed74b67..8cbec8c 100644
--- a/lib/Support/Unix/Host.inc
+++ b/lib/Support/Unix/Host.inc
@@ -87,10 +87,7 @@ std::string sys::getHostTriple() {
   std::string::size_type DarwinDashIdx = Triple.find("-darwin");
   if (DarwinDashIdx != std::string::npos) {
     Triple.resize(DarwinDashIdx + strlen("-darwin"));
-
-    // Only add the major part of the os version.
-    std::string Version = getOSVersion();
-    Triple += Version.substr(0, Version.find('.'));
+    Triple += getOSVersion();
   }
 
   return Triple;
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index 4312d67..5a57a28 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -124,7 +124,7 @@ bool llvm::sys::Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
     (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
   return KERN_SUCCESS == kr;
 #else
-  return false;
+  return true;
 #endif
 }
 
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index 0f6e800..430cf2e 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -869,18 +869,18 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
   return false;
 }
 
-const char *Path::MapInFilePages(int FD, uint64_t FileSize) {
+const char *Path::MapInFilePages(int FD, size_t FileSize, off_t Offset) {
   int Flags = MAP_PRIVATE;
 #ifdef MAP_FILE
   Flags |= MAP_FILE;
 #endif
-  void *BasePtr = ::mmap(0, FileSize, PROT_READ, Flags, FD, 0);
+  void *BasePtr = ::mmap(0, FileSize, PROT_READ, Flags, FD, Offset);
   if (BasePtr == MAP_FAILED)
     return 0;
   return (const char*)BasePtr;
 }
 
-void Path::UnMapFilePages(const char *BasePtr, uint64_t FileSize) {
+void Path::UnMapFilePages(const char *BasePtr, size_t FileSize) {
   ::munmap((void*)BasePtr, FileSize);
 }
 
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index 1104bc7..9f0a9ef 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -132,7 +132,7 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
 
 #ifdef HAVE_POSIX_SPAWN
 static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg,
-                          posix_spawn_file_actions_t &FileActions) {
+                          posix_spawn_file_actions_t *FileActions) {
   if (Path == 0) // Noop
     return false;
   const char *File;
@@ -142,7 +142,7 @@ static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg,
   else
     File = Path->c_str();
 
-  if (int Err = posix_spawn_file_actions_addopen(&FileActions, FD,
+  if (int Err = posix_spawn_file_actions_addopen(FileActions, FD,
                             File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666))
     return MakeErrMsg(ErrMsg, "Cannot dup2", Err);
   return false;
@@ -185,10 +185,13 @@ Program::Execute(const Path &path, const char **args, const char **envp,
   // posix_spawn.  It is more efficient than fork/exec.
 #ifdef HAVE_POSIX_SPAWN
   if (memoryLimit == 0) {
-    posix_spawn_file_actions_t FileActions;
-    posix_spawn_file_actions_init(&FileActions);
+    posix_spawn_file_actions_t FileActionsStore;
+    posix_spawn_file_actions_t *FileActions = 0;
 
     if (redirects) {
+      FileActions = &FileActionsStore;
+      posix_spawn_file_actions_init(FileActions);
+
       // Redirect stdin/stdout.
       if (RedirectIO_PS(redirects[0], 0, ErrMsg, FileActions) ||
           RedirectIO_PS(redirects[1], 1, ErrMsg, FileActions))
@@ -200,7 +203,7 @@ Program::Execute(const Path &path, const char **args, const char **envp,
       } else {
         // If stdout and stderr should go to the same place, redirect stderr
         // to the FD already open for stdout.
-        if (int Err = posix_spawn_file_actions_adddup2(&FileActions, 1, 2))
+        if (int Err = posix_spawn_file_actions_adddup2(FileActions, 1, 2))
           return !MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout", Err);
       }
     }
@@ -216,10 +219,11 @@ Program::Execute(const Path &path, const char **args, const char **envp,
     // Explicitly initialized to prevent what appears to be a valgrind false
     // positive.
     pid_t PID = 0;
-    int Err = posix_spawn(&PID, path.c_str(), &FileActions, /*attrp*/0,
+    int Err = posix_spawn(&PID, path.c_str(), FileActions, /*attrp*/0,
                           const_cast<char **>(args), const_cast<char **>(envp));
 
-    posix_spawn_file_actions_destroy(&FileActions);
+    if (FileActions)
+      posix_spawn_file_actions_destroy(FileActions);
 
     if (Err)
      return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err);
@@ -232,7 +236,7 @@ Program::Execute(const Path &path, const char **args, const char **envp,
   // Create a child process.
   int child = fork();
   switch (child) {
-    // An error occured:  Return to the caller.
+    // An error occurred:  Return to the caller.
     case -1:
       MakeErrMsg(ErrMsg, "Couldn't fork");
       return false;
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index 0a61759..e286869 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This file defines some helpful functions for dealing with the possibility of
-// Unix signals occuring while your program is running.
+// Unix signals occurring while your program is running.
 //
 //===----------------------------------------------------------------------===//
 
@@ -274,6 +274,9 @@ void llvm::sys::PrintStackTraceOnErrorSignal() {
 
 #ifdef __APPLE__
 
+#include <signal.h>
+#include <pthread.h>
+
 int raise(int sig) {
   return pthread_kill(pthread_self(), sig);
 }
@@ -291,9 +294,6 @@ void __assert_rtn(const char *func,
   abort();
 }
 
-#include <signal.h>
-#include <pthread.h>
-
 void abort() {
   raise(SIGABRT);
   usleep(1000);
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index 2c14366..4227844 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -41,41 +41,12 @@ using namespace sys;
 
 static std::vector<HMODULE> OpenedHandles;
 
-#ifdef _WIN64
-  typedef DWORD64 ModuleBaseType;
-#else
-  typedef ULONG ModuleBaseType;
-#endif
-
 extern "C" {
-// Use old callback if:
-//  - Not using Visual Studio
-//  - Visual Studio 2005 or earlier but only if we are not using the Windows SDK
-//    or Windows SDK version is older than 6.0
-// Use new callback if:
-//  - Newer Visual Studio (comes with newer SDK).
-//  - Visual Studio 2005 with Windows SDK 6.0+
-#if defined(_MSC_VER)
-  #if _MSC_VER < 1500 && (!defined(VER_PRODUCTBUILD) || VER_PRODUCTBUILD < 6000)
-    #define OLD_ELM_CALLBACK_DECL 1
-  #endif
-#elif defined(__MINGW64__)
-  // Use new callback.
-#elif defined(__MINGW32__)
-  #define OLD_ELM_CALLBACK_DECL 1
-#endif
 
-#ifdef OLD_ELM_CALLBACK_DECL
-  static BOOL CALLBACK ELM_Callback(PSTR  ModuleName,
-                                    ModuleBaseType ModuleBase,
+  static BOOL CALLBACK ELM_Callback(WIN32_ELMCB_PCSTR ModuleName,
+                                    ULONG_PTR ModuleBase,
                                     ULONG ModuleSize,
                                     PVOID UserContext)
-#else
-  static BOOL CALLBACK ELM_Callback(PCSTR  ModuleName,
-                                    ModuleBaseType ModuleBase,
-                                    ULONG ModuleSize,
-                                    PVOID UserContext)
-#endif
   {
     // Ignore VC++ runtimes prior to 7.1.  Somehow some of them get loaded
     // into the process.
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index 625f67a..42a92f9 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -882,7 +882,17 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
   // Find a numeric suffix that isn't used by an existing file.  Assume there
   // won't be more than 1 million files with the same prefix.  Probably a safe
   // bet.
-  static unsigned FCounter = 0;
+  static int FCounter = -1;
+  if (FCounter < 0) {
+    // Give arbitrary initial seed.
+    // FIXME: We should use sys::fs::unique_file() in future.
+    LARGE_INTEGER cnt64;
+    DWORD x = GetCurrentProcessId();
+    x = (x << 16) | (x >> 16);
+    if (QueryPerformanceCounter(&cnt64))    // RDTSC
+      x ^= cnt64.HighPart ^ cnt64.LowPart;
+    FCounter = x % 1000000;
+  }
   do {
     sprintf(FNBuffer+offset, "-%06u", FCounter);
     if (++FCounter > 999999)
@@ -908,12 +918,12 @@ Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
 }
 
 /// MapInFilePages - Not yet implemented on win32.
-const char *Path::MapInFilePages(int FD, uint64_t FileSize) {
+const char *Path::MapInFilePages(int FD, size_t FileSize, off_t Offset) {
   return 0;
 }
 
 /// MapInFilePages - Not yet implemented on win32.
-void Path::UnMapFilePages(const char *Base, uint64_t FileSize) {
+void Path::UnMapFilePages(const char *Base, size_t FileSize) {
   assert(0 && "NOT IMPLEMENTED");
 }
 
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
index 8effb0c..af71b73 100644
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@@ -449,7 +449,14 @@ error_code status(const Twine &path, file_status &result) {
   SmallString<128> path_storage;
   SmallVector<wchar_t, 128> path_utf16;
 
-  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+  StringRef path8 = path.toStringRef(path_storage);
+  // FIXME: We should detect as many "special file name" as possible.
+  if (path8.compare_lower("nul") == 0) {
+    result = file_status(file_type::character_file);
+    return success;
+  }
+
+  if (error_code ec = UTF8ToUTF16(path8,
                                   path_utf16))
     return ec;
 
@@ -649,7 +656,7 @@ error_code get_magic(const Twine &path, uint32_t len,
   ::CloseHandle(file);
   if (!read_success || (bytes_read != len)) {
     // Set result size to the number of bytes read if it's valid.
-    if (bytes_read >= 0 && bytes_read <= len)
+    if (bytes_read <= len)
       result.set_size(bytes_read);
     // ERROR_HANDLE_EOF is mapped to errc::value_too_large.
     return ec;
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 80ea740..5a71fa3 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -220,6 +220,36 @@ raw_ostream &raw_ostream::operator<<(const void *P) {
 }
 
 raw_ostream &raw_ostream::operator<<(double N) {
+#ifdef _WIN32
+  // On MSVCRT and compatible, output of %e is incompatible to Posix
+  // by default. Number of exponent digits should be at least 2. "%+03d"
+  // FIXME: Implement our formatter to here or Support/Format.h!
+  int fpcl = _fpclass(N);
+
+  // negative zero
+  if (fpcl == _FPCLASS_NZ)
+    return *this << "-0.000000e+00";
+
+  char buf[16];
+  unsigned len;
+  len = snprintf(buf, sizeof(buf), "%e", N);
+  if (len <= sizeof(buf) - 2) {
+    if (len >= 5 && buf[len - 5] == 'e' && buf[len - 3] == '0') {
+      int cs = buf[len - 4];
+      if (cs == '+' || cs == '-') {
+        int c1 = buf[len - 2];
+        int c0 = buf[len - 1];
+        if (isdigit(c1) && isdigit(c0)) {
+          // Trim leading '0': "...e+012" -> "...e+12\0"
+          buf[len - 3] = c1;
+          buf[len - 2] = c0;
+          buf[--len] = 0;
+        }
+      }
+    }
+    return this->operator<<(buf);
+  }
+#endif
   return this->operator<<(format("%e", N));
 }
 
@@ -265,15 +295,23 @@ raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
       return write(Ptr, Size);
     }
 
-    // Write out the data in buffer-sized blocks until the remainder
-    // fits within the buffer.
-    do {
-      size_t NumBytes = OutBufEnd - OutBufCur;
-      copy_to_buffer(Ptr, NumBytes);
-      flush_nonempty();
-      Ptr += NumBytes;
-      Size -= NumBytes;
-    } while (OutBufCur+Size > OutBufEnd);
+    size_t NumBytes = OutBufEnd - OutBufCur;
+
+    // If the buffer is empty at this point we have a string that is larger
+    // than the buffer. Directly write the chunk that is a multiple of the
+    // preferred buffer size and put the remainder in the buffer.
+    if (BUILTIN_EXPECT(OutBufCur == OutBufStart, false)) {
+      size_t BytesToWrite = Size - (Size % NumBytes);
+      write_impl(Ptr, BytesToWrite);
+      copy_to_buffer(Ptr + BytesToWrite, Size - BytesToWrite);
+      return *this;
+    }
+
+    // We don't have enough space in the buffer to fit the string in. Insert as
+    // much as possible, flush and start over with the remainder.
+    copy_to_buffer(Ptr, NumBytes);
+    flush_nonempty();
+    return write(Ptr + NumBytes, Size - NumBytes);
   }
 
   copy_to_buffer(Ptr, Size);
@@ -458,6 +496,14 @@ raw_fd_ostream::~raw_fd_ostream() {
         }
   }
 
+#ifdef __MINGW32__
+  // On mingw, global dtors should not call exit().
+  // report_fatal_error() invokes exit(). We know report_fatal_error()
+  // might not write messages to stderr when any errors were detected
+  // on FD == 2.
+  if (FD == 2) return;
+#endif
+
   // If there are any pending errors, report them now. Clients wishing
   // to avoid report_fatal_error calls should check for errors with
   // has_error() and clear the error flag with clear_error() before
diff --git a/lib/Support/regcomp.c b/lib/Support/regcomp.c
index cd018d5..46c91a9 100644
--- a/lib/Support/regcomp.c
+++ b/lib/Support/regcomp.c
@@ -780,7 +780,7 @@ p_b_cclass(struct parse *p, cset *cs)
 	const char *u;
 	char c;
 
-	while (MORE() && isalpha(PEEK()))
+	while (MORE() && isalpha((uch)PEEK()))
 		NEXT();
 	len = p->next - sp;
 	for (cp = cclasses; cp->name != NULL; cp++)
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index bf4315f..6af5f85 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -51,6 +51,12 @@ def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
 // to just not use them.
 def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
                                          "Disable VFP / NEON MAC instructions">;
+
+// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding.
+def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
+                                       "HasVMLxForwarding", "true",
+                                       "Has multiplier accumulator forwarding">;
+
 // Some processors benefit from using NEON instructions for scalar
 // single-precision FP operations.
 def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
@@ -61,6 +67,14 @@ def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
 def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
                                              "Prefer 32-bit Thumb instrs">;
 
+/// Some instructions update CPSR partially, which can add false dependency for
+/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is
+/// mapped to a separate physical register. Avoid partial CPSR update for these
+/// processors.
+def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
+                                               "AvoidCPSRPartialUpdate", "true",
+                                 "Avoid CPSR partial update for OOO execution">;
+
 // Multiprocessing extension.
 def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
                                  "Supports Multiprocessing extension">;
@@ -100,11 +114,13 @@ def ProcOthers  : SubtargetFeature<"others", "ARMProcFamily", "Others",
 def ProcA8      : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
                                    "Cortex-A8 ARM processors",
                                    [FeatureSlowFPBrcc, FeatureNEONForFP,
-                                    FeatureHasSlowFPVMLx, FeatureT2XtPk]>;
+                                    FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
+                                    FeatureT2XtPk]>;
 def ProcA9      : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
                                    "Cortex-A9 ARM processors",
-                                   [FeatureHasSlowFPVMLx, FeatureT2XtPk,
-                                    FeatureFP16]>;
+                                   [FeatureVMLxForwarding,
+                                    FeatureT2XtPk, FeatureFP16,
+                                    FeatureAvoidPartialCPSR]>;
 
 class ProcNoItin<string Name, list<SubtargetFeature> Features>
  : Processor<Name, GenericItineraries, Features>;
@@ -171,6 +187,8 @@ def : Processor<"cortex-a8",        CortexA8Itineraries,
                                     [ArchV7A, ProcA8]>;
 def : Processor<"cortex-a9",        CortexA9Itineraries,
                                     [ArchV7A, ProcA9]>;
+def : Processor<"cortex-a9-mp",     CortexA9Itineraries,
+                                    [ArchV7A, ProcA9, FeatureMP]>;
 
 // V7M Processors.
 def : ProcNoItin<"cortex-m3",       [ArchV7M]>;
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index 19fbf05..595708f 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -408,16 +408,18 @@ namespace ARM_AM {
   //
   // The first operand is always a Reg.  The second operand is a reg if in
   // reg/reg form, otherwise it's reg#0.  The third field encodes the operation
-  // in bit 12, the immediate in bits 0-11, and the shift op in 13-15.
+  // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The
+  // fourth operand 16-17 encodes the index mode.
   //
   // If this addressing mode is a frame index (before prolog/epilog insertion
   // and code rewriting), this operand will have the form:  FI#, reg0, <offs>
   // with no shift amount for the frame offset.
   //
-  static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) {
+  static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO,
+                                   unsigned IdxMode = 0) {
     assert(Imm12 < (1 << 12) && "Imm too large!");
     bool isSub = Opc == sub;
-    return Imm12 | ((int)isSub << 12) | (SO << 13);
+    return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ;
   }
   static inline unsigned getAM2Offset(unsigned AM2Opc) {
     return AM2Opc & ((1 << 12)-1);
@@ -426,7 +428,10 @@ namespace ARM_AM {
     return ((AM2Opc >> 12) & 1) ? sub : add;
   }
   static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
-    return (ShiftOpc)(AM2Opc >> 13);
+    return (ShiftOpc)((AM2Opc >> 13) & 7);
+  }
+  static inline unsigned getAM2IdxMode(unsigned AM2Opc) {
+    return (AM2Opc >> 16);
   }
 
 
@@ -441,12 +446,14 @@ namespace ARM_AM {
   //
   // The first operand is always a Reg.  The second operand is a reg if in
   // reg/reg form, otherwise it's reg#0.  The third field encodes the operation
-  // in bit 8, the immediate in bits 0-7.
+  // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the
+  // index mode.
 
   /// getAM3Opc - This function encodes the addrmode3 opc field.
-  static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) {
+  static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset,
+                                   unsigned IdxMode = 0) {
     bool isSub = Opc == sub;
-    return ((int)isSub << 8) | Offset;
+    return ((int)isSub << 8) | Offset | (IdxMode << 9);
   }
   static inline unsigned char getAM3Offset(unsigned AM3Opc) {
     return AM3Opc & 0xFF;
@@ -454,6 +461,9 @@ namespace ARM_AM {
   static inline AddrOpc getAM3Op(unsigned AM3Opc) {
     return ((AM3Opc >> 8) & 1) ? sub : add;
   }
+  static inline unsigned getAM3IdxMode(unsigned AM3Opc) {
+    return (AM3Opc >> 9);
+  }
 
   //===--------------------------------------------------------------------===//
   // Addressing Mode #4
diff --git a/lib/Target/ARM/ARMAsmBackend.cpp b/lib/Target/ARM/ARMAsmBackend.cpp
index ec23449..f062819 100644
--- a/lib/Target/ARM/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/ARMAsmBackend.cpp
@@ -246,7 +246,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
     }
 
     uint32_t out = (opc << 21);
-    out |= (Value & 0x800) << 14;
+    out |= (Value & 0x800) << 15;
     out |= (Value & 0x700) << 4;
     out |= (Value & 0x0FF);
 
@@ -416,21 +416,22 @@ void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
 // FIXME: This should be in a separate file.
 class DarwinARMAsmBackend : public ARMAsmBackend {
 public:
-  DarwinARMAsmBackend(const Target &T) : ARMAsmBackend(T) { }
-
-  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
-                  uint64_t Value) const;
+  const object::mach::CPUSubtypeARM Subtype;
+  DarwinARMAsmBackend(const Target &T, object::mach::CPUSubtypeARM st)
+    : ARMAsmBackend(T), Subtype(st) { }
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    // FIXME: Subtarget info should be derived. Force v7 for now.
     return createMachObjectWriter(new ARMMachObjectWriter(
                                     /*Is64Bit=*/false,
                                     object::mach::CTM_ARM,
-                                    object::mach::CSARM_V7),
+                                    Subtype),
                                   OS,
                                   /*IsLittleEndian=*/true);
   }
 
+  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const;
+
   virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
     return false;
   }
@@ -499,14 +500,17 @@ void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
 
 TargetAsmBackend *llvm::createARMAsmBackend(const Target &T,
                                             const std::string &TT) {
-  switch (Triple(TT).getOS()) {
-  case Triple::Darwin:
-    return new DarwinARMAsmBackend(T);
-  case Triple::MinGW32:
-  case Triple::Cygwin:
-  case Triple::Win32:
-    assert(0 && "Windows not supported on ARM");
-  default:
-    return new ELFARMAsmBackend(T, Triple(TT).getOS());
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin()) {
+    if (TheTriple.getArchName() == "armv6" ||
+        TheTriple.getArchName() == "thumbv6")
+      return new DarwinARMAsmBackend(T, object::mach::CSARM_V6);
+    return new DarwinARMAsmBackend(T, object::mach::CSARM_V7);
   }
+
+  if (TheTriple.isOSWindows())
+    assert(0 && "Windows not supported on ARM");
+
+  return new ELFARMAsmBackend(T, Triple(TT).getOS());
 }
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index db12b8e..c428e18 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -88,6 +88,11 @@ namespace {
       case ARMBuildAttrs::CPU_name:
         Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String));
         break;
+      /* GAS requires .fpu to be emitted regardless of EABI attribute */
+      case ARMBuildAttrs::Advanced_SIMD_arch:
+      case ARMBuildAttrs::VFP_arch:
+        Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String));
+        break;    
       default: assert(0 && "Unsupported Text attribute in ASM Mode"); break;
       }
     }
@@ -167,6 +172,117 @@ getDebugValueLocation(const MachineInstr *MI) const {
   return Location;
 }
 
+/// getDwarfRegOpSize - get size required to emit given machine location using
+/// dwarf encoding.
+unsigned ARMAsmPrinter::getDwarfRegOpSize(const MachineLocation &MLoc) const {
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+  if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1)
+    return AsmPrinter::getDwarfRegOpSize(MLoc);
+  else {
+    unsigned Reg = MLoc.getReg();
+    if (Reg >= ARM::S0 && Reg <= ARM::S31) {
+      assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering");
+      // S registers are described as bit-pieces of a register
+      // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0)
+      // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32)
+      
+      unsigned SReg = Reg - ARM::S0;
+      unsigned Rx = 256 + (SReg >> 1);
+      OutStreamer.AddComment("Loc expr size");
+      // DW_OP_regx + ULEB + DW_OP_bit_piece + ULEB + ULEB
+      //   1 + ULEB(Rx) + 1 + 1 + 1
+      return 4 + MCAsmInfo::getULEB128Size(Rx);
+    } 
+    
+    if (Reg >= ARM::Q0 && Reg <= ARM::Q15) {
+      assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering");
+      // Q registers Q0-Q15 are described by composing two D registers together.
+      // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8)
+
+      unsigned QReg = Reg - ARM::Q0;
+      unsigned D1 = 256 + 2 * QReg;
+      unsigned D2 = D1 + 1;
+      
+      OutStreamer.AddComment("Loc expr size");
+      // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8) +
+      // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8);
+      //   6 + ULEB(D1) + ULEB(D2)
+      return 6 + MCAsmInfo::getULEB128Size(D1) + MCAsmInfo::getULEB128Size(D2);
+    }
+  }
+  return 0;
+}
+
+/// EmitDwarfRegOp - Emit dwarf register operation.
+void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+  if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1)
+    AsmPrinter::EmitDwarfRegOp(MLoc);
+  else {
+    unsigned Reg = MLoc.getReg();
+    if (Reg >= ARM::S0 && Reg <= ARM::S31) {
+      assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering");
+      // S registers are described as bit-pieces of a register
+      // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0)
+      // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32)
+      
+      unsigned SReg = Reg - ARM::S0;
+      bool odd = SReg & 0x1;
+      unsigned Rx = 256 + (SReg >> 1);
+      OutStreamer.AddComment("Loc expr size");
+      // DW_OP_regx + ULEB + DW_OP_bit_piece + ULEB + ULEB
+      //   1 + ULEB(Rx) + 1 + 1 + 1
+      EmitInt16(4 + MCAsmInfo::getULEB128Size(Rx));
+
+      OutStreamer.AddComment("DW_OP_regx for S register");
+      EmitInt8(dwarf::DW_OP_regx);
+
+      OutStreamer.AddComment(Twine(SReg));
+      EmitULEB128(Rx);
+
+      if (odd) {
+        OutStreamer.AddComment("DW_OP_bit_piece 32 32");
+        EmitInt8(dwarf::DW_OP_bit_piece);
+        EmitULEB128(32);
+        EmitULEB128(32);
+      } else {
+        OutStreamer.AddComment("DW_OP_bit_piece 32 0");
+        EmitInt8(dwarf::DW_OP_bit_piece);
+        EmitULEB128(32);
+        EmitULEB128(0);
+      }
+    } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) {
+      assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering");
+      // Q registers Q0-Q15 are described by composing two D registers together.
+      // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8)
+
+      unsigned QReg = Reg - ARM::Q0;
+      unsigned D1 = 256 + 2 * QReg;
+      unsigned D2 = D1 + 1;
+      
+      OutStreamer.AddComment("Loc expr size");
+      // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8) +
+      // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8);
+      //   6 + ULEB(D1) + ULEB(D2)
+      EmitInt16(6 + MCAsmInfo::getULEB128Size(D1) + MCAsmInfo::getULEB128Size(D2));
+
+      OutStreamer.AddComment("DW_OP_regx for Q register: D1");
+      EmitInt8(dwarf::DW_OP_regx);
+      EmitULEB128(D1);
+      OutStreamer.AddComment("DW_OP_piece 8");
+      EmitInt8(dwarf::DW_OP_piece);
+      EmitULEB128(8);
+
+      OutStreamer.AddComment("DW_OP_regx for Q register: D2");
+      EmitInt8(dwarf::DW_OP_regx);
+      EmitULEB128(D2);
+      OutStreamer.AddComment("DW_OP_piece 8");
+      EmitInt8(dwarf::DW_OP_piece);
+      EmitULEB128(8);
+    }
+  }
+}
+
 void ARMAsmPrinter::EmitFunctionEntryLabel() {
   if (AFI->isThumbFunction()) {
     OutStreamer.EmitAssemblerFlag(MCAF_Code16);
@@ -453,10 +569,13 @@ void ARMAsmPrinter::emitAttributes() {
 
   emitARMAttributeSection();
 
+  /* GAS expect .fpu to be emitted, regardless of VFP build attribute */
+  bool emitFPU = false;
   AttributeEmitter *AttrEmitter;
-  if (OutStreamer.hasRawTextSupport())
+  if (OutStreamer.hasRawTextSupport()) {
     AttrEmitter = new AsmAttributeEmitter(OutStreamer);
-  else {
+    emitFPU = true;
+  } else {
     MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer);
     AttrEmitter = new ObjectAttributeEmitter(O);
   }
@@ -490,10 +609,36 @@ void ARMAsmPrinter::emitAttributes() {
                                ARMBuildAttrs::Allowed);
   }
 
-  // FIXME: Emit FPU type
-  if (Subtarget->hasVFP2())
+  if (Subtarget->hasNEON() && emitFPU) {
+    /* NEON is not exactly a VFP architecture, but GAS emit one of
+     * neon/vfpv3/vfpv2 for .fpu parameters */
+    AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
+    /* If emitted for NEON, omit from VFP below, since you can have both
+     * NEON and VFP in build attributes but only one .fpu */
+    emitFPU = false;
+  }
+
+  /* VFPv3 + .fpu */
+  if (Subtarget->hasVFP3()) {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+                               ARMBuildAttrs::AllowFPv3A);
+    if (emitFPU)
+      AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv3");
+
+  /* VFPv2 + .fpu */
+  } else if (Subtarget->hasVFP2()) {
     AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
                                ARMBuildAttrs::AllowFPv2);
+    if (emitFPU)
+      AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv2");
+  }
+
+  /* TODO: ARMBuildAttrs::Allowed is not completely accurate,
+   * since NEON can have 1 (allowed) or 2 (fused MAC operations) */
+  if (Subtarget->hasNEON()) {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+                               ARMBuildAttrs::Allowed);
+  }
 
   // Signal various FP modes.
   if (!UnsafeFPMath) {
@@ -777,10 +922,161 @@ void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI,
   OutStreamer.EmitInstruction(TmpInst);
 }
 
+void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
+  assert(MI->getFlag(MachineInstr::FrameSetup) &&
+      "Only instruction which are involved into frame setup code are allowed");
+
+  const MachineFunction &MF = *MI->getParent()->getParent();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
+
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+  unsigned Opc = MI->getOpcode();
+  unsigned SrcReg, DstReg;
+
+  if (Opc == ARM::tPUSH || Opc == ARM::tLDRpci) {
+    // Two special cases:
+    // 1) tPUSH does not have src/dst regs.
+    // 2) for Thumb1 code we sometimes materialize the constant via constpool
+    // load. Yes, this is pretty fragile, but for now I don't see better
+    // way... :(
+    SrcReg = DstReg = ARM::SP;
+  } else {
+    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI->getOperand(0).getReg();
+  }
+
+  // Try to figure out the unwinding opcode out of src / dst regs.
+  if (MI->getDesc().mayStore()) {
+    // Register saves.
+    assert(DstReg == ARM::SP &&
+           "Only stack pointer as a destination reg is supported");
+
+    SmallVector<unsigned, 4> RegList;
+    // Skip src & dst reg, and pred ops.
+    unsigned StartOp = 2 + 2;
+    // Use all the operands.
+    unsigned NumOffset = 0;
+
+    switch (Opc) {
+    default:
+      MI->dump();
+      assert(0 && "Unsupported opcode for unwinding information");
+    case ARM::tPUSH:
+      // Special case here: no src & dst reg, but two extra imp ops.
+      StartOp = 2; NumOffset = 2;
+    case ARM::STMDB_UPD:
+    case ARM::t2STMDB_UPD:
+    case ARM::VSTMDDB_UPD:
+      assert(SrcReg == ARM::SP &&
+             "Only stack pointer as a source reg is supported");
+      for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset;
+           i != NumOps; ++i)
+        RegList.push_back(MI->getOperand(i).getReg());
+      break;
+    case ARM::STR_PRE:
+      assert(MI->getOperand(2).getReg() == ARM::SP &&
+             "Only stack pointer as a source reg is supported");
+      RegList.push_back(SrcReg);
+      break;
+    }
+    OutStreamer.EmitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
+  } else {
+    // Changes of stack / frame pointer.
+    if (SrcReg == ARM::SP) {
+      int64_t Offset = 0;
+      switch (Opc) {
+      default:
+        MI->dump();
+        assert(0 && "Unsupported opcode for unwinding information");
+      case ARM::MOVr:
+      case ARM::tMOVgpr2gpr:
+      case ARM::tMOVgpr2tgpr:
+        Offset = 0;
+        break;
+      case ARM::ADDri:
+        Offset = -MI->getOperand(2).getImm();
+        break;
+      case ARM::SUBri:
+      case ARM::t2SUBrSPi:
+        Offset =  MI->getOperand(2).getImm();
+        break;
+      case ARM::tSUBspi:
+        Offset =  MI->getOperand(2).getImm()*4;
+        break;
+      case ARM::tADDspi:
+      case ARM::tADDrSPi:
+        Offset = -MI->getOperand(2).getImm()*4;
+        break;
+      case ARM::tLDRpci: {
+        // Grab the constpool index and check, whether it corresponds to
+        // original or cloned constpool entry.
+        unsigned CPI = MI->getOperand(1).getIndex();
+        const MachineConstantPool *MCP = MF.getConstantPool();
+        if (CPI >= MCP->getConstants().size())
+          CPI = AFI.getOriginalCPIdx(CPI);
+        assert(CPI != -1U && "Invalid constpool index");
+
+        // Derive the actual offset.
+        const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+        assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry");
+        // FIXME: Check for user, it should be "add" instruction!
+        Offset = -cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue();
+        break;
+      }
+      }
+
+      if (DstReg == FramePtr && FramePtr != ARM::SP)
+        // Set-up of the frame pointer. Positive values correspond to "add"
+        // instruction.
+        OutStreamer.EmitSetFP(FramePtr, ARM::SP, -Offset);
+      else if (DstReg == ARM::SP) {
+        // Change of SP by an offset. Positive values correspond to "sub"
+        // instruction.
+        OutStreamer.EmitPad(Offset);
+      } else {
+        MI->dump();
+        assert(0 && "Unsupported opcode for unwinding information");
+      }
+    } else if (DstReg == ARM::SP) {
+      // FIXME: .movsp goes here
+      MI->dump();
+      assert(0 && "Unsupported opcode for unwinding information");
+    }
+    else {
+      MI->dump();
+      assert(0 && "Unsupported opcode for unwinding information");
+    }
+  }
+}
+
+extern cl::opt<bool> EnableARMEHABI;
+
 void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   unsigned Opc = MI->getOpcode();
   switch (Opc) {
   default: break;
+  case ARM::B: {
+    // B is just a Bcc with an 'always' predicate.
+    MCInst TmpInst;
+    LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+    TmpInst.setOpcode(ARM::Bcc);
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::LDMIA_RET: {
+    // LDMIA_RET is just a normal LDMIA_UPD instruction that targets PC and as
+    // such has additional code-gen properties and scheduling information.
+    // To emit it, we just construct as normal and set the opcode to LDMIA_UPD.
+    MCInst TmpInst;
+    LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+    TmpInst.setOpcode(ARM::LDMIA_UPD);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
   case ARM::t2ADDrSPi:
   case ARM::t2ADDrSPi12:
   case ARM::t2SUBrSPi:
@@ -850,6 +1146,26 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     OutStreamer.EmitInstruction(TmpInst);
     return;
   }
+  // Darwin call instructions are just normal call instructions with different
+  // clobber semantics (they clobber R9).
+  case ARM::BLr9:
+  case ARM::BLr9_pred:
+  case ARM::BLXr9:
+  case ARM::BLXr9_pred: {
+    unsigned newOpc;
+    switch (Opc) {
+    default: assert(0);
+    case ARM::BLr9:       newOpc = ARM::BL; break;
+    case ARM::BLr9_pred:  newOpc = ARM::BL_pred; break;
+    case ARM::BLXr9:      newOpc = ARM::BLX; break;
+    case ARM::BLXr9_pred: newOpc = ARM::BLX_pred; break;
+    }
+    MCInst TmpInst;
+    LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+    TmpInst.setOpcode(newOpc);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
   case ARM::BXr9_CALL:
   case ARM::BX_CALL: {
     {
@@ -1502,6 +1818,49 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     }
     return;
   }
+  // Tail jump branches are really just branch instructions with additional
+  // code-gen attributes. Convert them to the canonical form here.
+  case ARM::TAILJMPd:
+  case ARM::TAILJMPdND: {
+    MCInst TmpInst, TmpInst2;
+    // Lower the instruction as-is to get the operands properly converted.
+    LowerARMMachineInstrToMCInst(MI, TmpInst2, *this);
+    TmpInst.setOpcode(ARM::Bcc);
+    TmpInst.addOperand(TmpInst2.getOperand(0));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.AddComment("TAILCALL");
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::tTAILJMPd:
+  case ARM::tTAILJMPdND: {
+    MCInst TmpInst, TmpInst2;
+    LowerARMMachineInstrToMCInst(MI, TmpInst2, *this);
+    TmpInst.setOpcode(ARM::tB);
+    TmpInst.addOperand(TmpInst2.getOperand(0));
+    OutStreamer.AddComment("TAILCALL");
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::TAILJMPrND:
+  case ARM::tTAILJMPrND:
+  case ARM::TAILJMPr:
+  case ARM::tTAILJMPr: {
+    unsigned newOpc = (Opc == ARM::TAILJMPr || Opc == ARM::TAILJMPrND)
+      ? ARM::BX : ARM::tBX;
+    MCInst TmpInst;
+    TmpInst.setOpcode(newOpc);
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    // Predicate.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.AddComment("TAILCALL");
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+
   // These are the pseudos created to comply with stricter operand restrictions
   // on ARMv5. Lower them now to "normal" instructions, since all the
   // restrictions are already satisfied.
@@ -1530,6 +1889,11 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
   MCInst TmpInst;
   LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+
+  // Emit unwinding stuff for frame-related instructions
+  if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup))
+    EmitUnwindingInstruction(MI);
+
   OutStreamer.EmitInstruction(TmpInst);
 }
 
@@ -1538,10 +1902,11 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 //===----------------------------------------------------------------------===//
 
 static MCInstPrinter *createARMMCInstPrinter(const Target &T,
+                                             TargetMachine &TM,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI) {
   if (SyntaxVariant == 0)
-    return new ARMInstPrinter(MAI);
+    return new ARMInstPrinter(TM, MAI);
   return 0;
 }
 
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 5852684..1ee1b70 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -82,11 +82,20 @@ private:
   // Generic helper used to emit e.g. ARMv5 mul pseudos
   void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc);
 
+  void EmitUnwindingInstruction(const MachineInstr *MI);
+
 public:
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
 
   MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
 
+  /// getDwarfRegOpSize - get size required to emit given machine location
+  /// using dwarf encoding.
+  virtual unsigned getDwarfRegOpSize(const MachineLocation &MLoc) const;
+
+  /// EmitDwarfRegOp - Emit dwarf register operation.
+  virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const;
+
   virtual unsigned getISAEncoding() {
     // ARM/Darwin adds ISA to the DWARF info for each function.
     if (!Subtarget->isTargetDarwin())
diff --git a/lib/Target/ARM/ARMBaseInfo.h b/lib/Target/ARM/ARMBaseInfo.h
index a56cc1a..36edbad 100644
--- a/lib/Target/ARM/ARMBaseInfo.h
+++ b/lib/Target/ARM/ARMBaseInfo.h
@@ -200,6 +200,59 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) {
 }
 
 namespace ARMII {
+
+  /// ARM Index Modes
+  enum IndexMode {
+    IndexModeNone  = 0,
+    IndexModePre   = 1,
+    IndexModePost  = 2,
+    IndexModeUpd   = 3
+  };
+
+  /// ARM Addressing Modes
+  enum AddrMode {
+    AddrModeNone    = 0,
+    AddrMode1       = 1,
+    AddrMode2       = 2,
+    AddrMode3       = 3,
+    AddrMode4       = 4,
+    AddrMode5       = 5,
+    AddrMode6       = 6,
+    AddrModeT1_1    = 7,
+    AddrModeT1_2    = 8,
+    AddrModeT1_4    = 9,
+    AddrModeT1_s    = 10, // i8 * 4 for pc and sp relative data
+    AddrModeT2_i12  = 11,
+    AddrModeT2_i8   = 12,
+    AddrModeT2_so   = 13,
+    AddrModeT2_pc   = 14, // +/- i12 for pc relative data
+    AddrModeT2_i8s4 = 15, // i8 * 4
+    AddrMode_i12    = 16
+  };
+
+  inline static const char *AddrModeToString(AddrMode addrmode) {
+    switch (addrmode) {
+    default: llvm_unreachable("Unknown memory operation");
+    case AddrModeNone:    return "AddrModeNone";
+    case AddrMode1:       return "AddrMode1";
+    case AddrMode2:       return "AddrMode2";
+    case AddrMode3:       return "AddrMode3";
+    case AddrMode4:       return "AddrMode4";
+    case AddrMode5:       return "AddrMode5";
+    case AddrMode6:       return "AddrMode6";
+    case AddrModeT1_1:    return "AddrModeT1_1";
+    case AddrModeT1_2:    return "AddrModeT1_2";
+    case AddrModeT1_4:    return "AddrModeT1_4";
+    case AddrModeT1_s:    return "AddrModeT1_s";
+    case AddrModeT2_i12:  return "AddrModeT2_i12";
+    case AddrModeT2_i8:   return "AddrModeT2_i8";
+    case AddrModeT2_so:   return "AddrModeT2_so";
+    case AddrModeT2_pc:   return "AddrModeT2_pc";
+    case AddrModeT2_i8s4: return "AddrModeT2_i8s4";
+    case AddrMode_i12:    return "AddrMode_i12";
+    }
+  }
+
   /// Target Operand Flag enum.
   enum TOF {
     //===------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 2268e59..44a3976 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1021,7 +1021,7 @@ reMaterialize(MachineBasicBlock &MBB,
     MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
                                       DestReg)
       .addConstantPoolIndex(CPI).addImm(PCLabelId);
-    (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
+    MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
     break;
   }
   }
@@ -1080,11 +1080,18 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
     int CPI1 = MO1.getIndex();
     const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
     const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
-    ARMConstantPoolValue *ACPV0 =
-      static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
-    ARMConstantPoolValue *ACPV1 =
-      static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
-    return ACPV0->hasSameValue(ACPV1);
+    bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
+    bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
+    if (isARMCP0 && isARMCP1) {
+      ARMConstantPoolValue *ACPV0 =
+        static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
+      ARMConstantPoolValue *ACPV1 =
+        static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
+      return ACPV0->hasSameValue(ACPV1);
+    } else if (!isARMCP0 && !isARMCP1) {
+      return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
+    }
+    return false;
   } else if (Opcode == ARM::PICLDR) {
     if (MI1->getOpcode() != Opcode)
       return false;
@@ -1194,7 +1201,7 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
 }
 
 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
-/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
 /// be scheduled togther. On some targets if two loads are loading from
 /// addresses in the same cache line, it's better if they are scheduled
 /// together. This function takes two integers that represent the load offsets
@@ -1263,19 +1270,19 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
 }
 
 bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
-                                           unsigned NumCyles,
+                                           unsigned NumCycles,
                                            unsigned ExtraPredCycles,
                                            float Probability,
                                            float Confidence) const {
-  if (!NumCyles)
+  if (!NumCycles)
     return false;
 
   // Attempt to estimate the relative costs of predication versus branching.
-  float UnpredCost = Probability * NumCyles;
+  float UnpredCost = Probability * NumCycles;
   UnpredCost += 1.0; // The branch itself
   UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
 
-  return (float)(NumCyles + ExtraPredCycles) < UnpredCost;
+  return (float)(NumCycles + ExtraPredCycles) < UnpredCost;
 }
 
 bool ARMBaseInstrInfo::
@@ -1328,7 +1335,7 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator &MBBI, DebugLoc dl,
                                unsigned DestReg, unsigned BaseReg, int NumBytes,
                                ARMCC::CondCodes Pred, unsigned PredReg,
-                               const ARMBaseInstrInfo &TII) {
+                               const ARMBaseInstrInfo &TII, unsigned MIFlags) {
   bool isSub = NumBytes < 0;
   if (isSub) NumBytes = -NumBytes;
 
@@ -1346,7 +1353,8 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
     unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
     BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
       .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
-      .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+      .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+      .setMIFlags(MIFlags);
     BaseReg = DestReg;
   }
 }
@@ -1610,18 +1618,84 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
   // Set the "zero" bit in CPSR.
   switch (MI->getOpcode()) {
   default: break;
+  case ARM::RSBrr:
+  case ARM::RSBri:
+  case ARM::RSCrr:
+  case ARM::RSCri:
+  case ARM::ADDrr:
   case ARM::ADDri:
-  case ARM::ANDri:
-  case ARM::t2ANDri:
+  case ARM::ADCrr:
+  case ARM::ADCri:
+  case ARM::SUBrr:
   case ARM::SUBri:
+  case ARM::SBCrr:
+  case ARM::SBCri:
+  case ARM::t2RSBri:
+  case ARM::t2ADDrr:
   case ARM::t2ADDri:
+  case ARM::t2ADCrr:
+  case ARM::t2ADCri:
+  case ARM::t2SUBrr:
   case ARM::t2SUBri:
+  case ARM::t2SBCrr:
+  case ARM::t2SBCri:
+  case ARM::ANDrr:
+  case ARM::ANDri:
+  case ARM::t2ANDrr:
+  case ARM::t2ANDri:
+  case ARM::ORRrr:
+  case ARM::ORRri:
+  case ARM::t2ORRrr:
+  case ARM::t2ORRri:
+  case ARM::EORrr:
+  case ARM::EORri:
+  case ARM::t2EORrr:
+  case ARM::t2EORri: {
+    // Scan forward for the use of CPSR, if it's a conditional code requires
+    // checking of V bit, then this is not safe to do. If we can't find the
+    // CPSR use (i.e. used in another block), then it's not safe to perform
+    // the optimization.
+    bool isSafe = false;
+    I = CmpInstr;
+    E = MI->getParent()->end();
+    while (!isSafe && ++I != E) {
+      const MachineInstr &Instr = *I;
+      for (unsigned IO = 0, EO = Instr.getNumOperands();
+           !isSafe && IO != EO; ++IO) {
+        const MachineOperand &MO = Instr.getOperand(IO);
+        if (!MO.isReg() || MO.getReg() != ARM::CPSR)
+          continue;
+        if (MO.isDef()) {
+          isSafe = true;
+          break;
+        }
+        // Condition code is after the operand before CPSR.
+        ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
+        switch (CC) {
+        default:
+          isSafe = true;
+          break;
+        case ARMCC::VS:
+        case ARMCC::VC:
+        case ARMCC::GE:
+        case ARMCC::LT:
+        case ARMCC::GT:
+        case ARMCC::LE:
+          return false;
+        }
+      }
+    }
+
+    if (!isSafe)
+      return false;
+
     // Toggle the optional operand to CPSR.
     MI->getOperand(5).setReg(ARM::CPSR);
     MI->getOperand(5).setIsDef(true);
     CmpInstr->eraseFromParent();
     return true;
   }
+  }
 
   return false;
 }
@@ -1741,9 +1815,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
     llvm_unreachable("Unexpected multi-uops instruction!");
     break;
   case ARM::VLDMQIA:
-  case ARM::VLDMQDB:
   case ARM::VSTMQIA:
-  case ARM::VSTMQDB:
     return 2;
 
   // The number of uOps for load / store multiple are determined by the number
@@ -1757,19 +1829,15 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
   // is not 64-bit aligned, then AGU would take an extra cycle.  For VFP / NEON
   // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
   case ARM::VLDMDIA:
-  case ARM::VLDMDDB:
   case ARM::VLDMDIA_UPD:
   case ARM::VLDMDDB_UPD:
   case ARM::VLDMSIA:
-  case ARM::VLDMSDB:
   case ARM::VLDMSIA_UPD:
   case ARM::VLDMSDB_UPD:
   case ARM::VSTMDIA:
-  case ARM::VSTMDDB:
   case ARM::VSTMDIA_UPD:
   case ARM::VSTMDDB_UPD:
   case ARM::VSTMSIA:
-  case ARM::VSTMSDB:
   case ARM::VSTMSIA_UPD:
   case ARM::VSTMSDB_UPD: {
     unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
@@ -1859,7 +1927,6 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
     switch (DefTID.getOpcode()) {
     default: break;
     case ARM::VLDMSIA:
-    case ARM::VLDMSDB:
     case ARM::VLDMSIA_UPD:
     case ARM::VLDMSDB_UPD:
       isSLoad = true;
@@ -1935,7 +2002,6 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
     switch (UseTID.getOpcode()) {
     default: break;
     case ARM::VSTMSIA:
-    case ARM::VSTMSDB:
     case ARM::VSTMSIA_UPD:
     case ARM::VSTMSDB_UPD:
       isSStore = true;
@@ -2006,11 +2072,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     break;
 
   case ARM::VLDMDIA:
-  case ARM::VLDMDDB:
   case ARM::VLDMDIA_UPD:
   case ARM::VLDMDDB_UPD:
   case ARM::VLDMSIA:
-  case ARM::VLDMSDB:
   case ARM::VLDMSIA_UPD:
   case ARM::VLDMSDB_UPD:
     DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
@@ -2049,11 +2113,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     break;
 
   case ARM::VSTMDIA:
-  case ARM::VSTMDDB:
   case ARM::VSTMDIA_UPD:
   case ARM::VSTMDDB_UPD:
   case ARM::VSTMSIA:
-  case ARM::VSTMSDB:
   case ARM::VSTMSIA_UPD:
   case ARM::VSTMSDB_UPD:
     UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
@@ -2160,6 +2222,101 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     }
   }
 
+  if (DefAlign < 8 && Subtarget.isCortexA9())
+    switch (DefTID.getOpcode()) {
+    default: break;
+    case ARM::VLD1q8:
+    case ARM::VLD1q16:
+    case ARM::VLD1q32:
+    case ARM::VLD1q64:
+    case ARM::VLD1q8_UPD:
+    case ARM::VLD1q16_UPD:
+    case ARM::VLD1q32_UPD:
+    case ARM::VLD1q64_UPD:
+    case ARM::VLD2d8:
+    case ARM::VLD2d16:
+    case ARM::VLD2d32:
+    case ARM::VLD2q8:
+    case ARM::VLD2q16:
+    case ARM::VLD2q32:
+    case ARM::VLD2d8_UPD:
+    case ARM::VLD2d16_UPD:
+    case ARM::VLD2d32_UPD:
+    case ARM::VLD2q8_UPD:
+    case ARM::VLD2q16_UPD:
+    case ARM::VLD2q32_UPD:
+    case ARM::VLD3d8:
+    case ARM::VLD3d16:
+    case ARM::VLD3d32:
+    case ARM::VLD1d64T:
+    case ARM::VLD3d8_UPD:
+    case ARM::VLD3d16_UPD:
+    case ARM::VLD3d32_UPD:
+    case ARM::VLD1d64T_UPD:
+    case ARM::VLD3q8_UPD:
+    case ARM::VLD3q16_UPD:
+    case ARM::VLD3q32_UPD:
+    case ARM::VLD4d8:
+    case ARM::VLD4d16:
+    case ARM::VLD4d32:
+    case ARM::VLD1d64Q:
+    case ARM::VLD4d8_UPD:
+    case ARM::VLD4d16_UPD:
+    case ARM::VLD4d32_UPD:
+    case ARM::VLD1d64Q_UPD:
+    case ARM::VLD4q8_UPD:
+    case ARM::VLD4q16_UPD:
+    case ARM::VLD4q32_UPD:
+    case ARM::VLD1DUPq8:
+    case ARM::VLD1DUPq16:
+    case ARM::VLD1DUPq32:
+    case ARM::VLD1DUPq8_UPD:
+    case ARM::VLD1DUPq16_UPD:
+    case ARM::VLD1DUPq32_UPD:
+    case ARM::VLD2DUPd8:
+    case ARM::VLD2DUPd16:
+    case ARM::VLD2DUPd32:
+    case ARM::VLD2DUPd8_UPD:
+    case ARM::VLD2DUPd16_UPD:
+    case ARM::VLD2DUPd32_UPD:
+    case ARM::VLD4DUPd8:
+    case ARM::VLD4DUPd16:
+    case ARM::VLD4DUPd32:
+    case ARM::VLD4DUPd8_UPD:
+    case ARM::VLD4DUPd16_UPD:
+    case ARM::VLD4DUPd32_UPD:
+    case ARM::VLD1LNd8:
+    case ARM::VLD1LNd16:
+    case ARM::VLD1LNd32:
+    case ARM::VLD1LNd8_UPD:
+    case ARM::VLD1LNd16_UPD:
+    case ARM::VLD1LNd32_UPD:
+    case ARM::VLD2LNd8:
+    case ARM::VLD2LNd16:
+    case ARM::VLD2LNd32:
+    case ARM::VLD2LNq16:
+    case ARM::VLD2LNq32:
+    case ARM::VLD2LNd8_UPD:
+    case ARM::VLD2LNd16_UPD:
+    case ARM::VLD2LNd32_UPD:
+    case ARM::VLD2LNq16_UPD:
+    case ARM::VLD2LNq32_UPD:
+    case ARM::VLD4LNd8:
+    case ARM::VLD4LNd16:
+    case ARM::VLD4LNd32:
+    case ARM::VLD4LNq16:
+    case ARM::VLD4LNq32:
+    case ARM::VLD4LNd8_UPD:
+    case ARM::VLD4LNd16_UPD:
+    case ARM::VLD4LNd32_UPD:
+    case ARM::VLD4LNq16_UPD:
+    case ARM::VLD4LNq32_UPD:
+      // If the address is not 64-bit aligned, the latencies of these
+      // instructions increases by one.
+      ++Latency;
+      break;
+    }
+
   return Latency;
 }
 
@@ -2226,6 +2383,113 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     }
   }
 
+  if (DefAlign < 8 && Subtarget.isCortexA9())
+    switch (DefTID.getOpcode()) {
+    default: break;
+    case ARM::VLD1q8Pseudo:
+    case ARM::VLD1q16Pseudo:
+    case ARM::VLD1q32Pseudo:
+    case ARM::VLD1q64Pseudo:
+    case ARM::VLD1q8Pseudo_UPD:
+    case ARM::VLD1q16Pseudo_UPD:
+    case ARM::VLD1q32Pseudo_UPD:
+    case ARM::VLD1q64Pseudo_UPD:
+    case ARM::VLD2d8Pseudo:
+    case ARM::VLD2d16Pseudo:
+    case ARM::VLD2d32Pseudo:
+    case ARM::VLD2q8Pseudo:
+    case ARM::VLD2q16Pseudo:
+    case ARM::VLD2q32Pseudo:
+    case ARM::VLD2d8Pseudo_UPD:
+    case ARM::VLD2d16Pseudo_UPD:
+    case ARM::VLD2d32Pseudo_UPD:
+    case ARM::VLD2q8Pseudo_UPD:
+    case ARM::VLD2q16Pseudo_UPD:
+    case ARM::VLD2q32Pseudo_UPD:
+    case ARM::VLD3d8Pseudo:
+    case ARM::VLD3d16Pseudo:
+    case ARM::VLD3d32Pseudo:
+    case ARM::VLD1d64TPseudo:
+    case ARM::VLD3d8Pseudo_UPD:
+    case ARM::VLD3d16Pseudo_UPD:
+    case ARM::VLD3d32Pseudo_UPD:
+    case ARM::VLD1d64TPseudo_UPD:
+    case ARM::VLD3q8Pseudo_UPD:
+    case ARM::VLD3q16Pseudo_UPD:
+    case ARM::VLD3q32Pseudo_UPD:
+    case ARM::VLD3q8oddPseudo:
+    case ARM::VLD3q16oddPseudo:
+    case ARM::VLD3q32oddPseudo:
+    case ARM::VLD3q8oddPseudo_UPD:
+    case ARM::VLD3q16oddPseudo_UPD:
+    case ARM::VLD3q32oddPseudo_UPD:
+    case ARM::VLD4d8Pseudo:
+    case ARM::VLD4d16Pseudo:
+    case ARM::VLD4d32Pseudo:
+    case ARM::VLD1d64QPseudo:
+    case ARM::VLD4d8Pseudo_UPD:
+    case ARM::VLD4d16Pseudo_UPD:
+    case ARM::VLD4d32Pseudo_UPD:
+    case ARM::VLD1d64QPseudo_UPD:
+    case ARM::VLD4q8Pseudo_UPD:
+    case ARM::VLD4q16Pseudo_UPD:
+    case ARM::VLD4q32Pseudo_UPD:
+    case ARM::VLD4q8oddPseudo:
+    case ARM::VLD4q16oddPseudo:
+    case ARM::VLD4q32oddPseudo:
+    case ARM::VLD4q8oddPseudo_UPD:
+    case ARM::VLD4q16oddPseudo_UPD:
+    case ARM::VLD4q32oddPseudo_UPD:
+    case ARM::VLD1DUPq8Pseudo:
+    case ARM::VLD1DUPq16Pseudo:
+    case ARM::VLD1DUPq32Pseudo:
+    case ARM::VLD1DUPq8Pseudo_UPD:
+    case ARM::VLD1DUPq16Pseudo_UPD:
+    case ARM::VLD1DUPq32Pseudo_UPD:
+    case ARM::VLD2DUPd8Pseudo:
+    case ARM::VLD2DUPd16Pseudo:
+    case ARM::VLD2DUPd32Pseudo:
+    case ARM::VLD2DUPd8Pseudo_UPD:
+    case ARM::VLD2DUPd16Pseudo_UPD:
+    case ARM::VLD2DUPd32Pseudo_UPD:
+    case ARM::VLD4DUPd8Pseudo:
+    case ARM::VLD4DUPd16Pseudo:
+    case ARM::VLD4DUPd32Pseudo:
+    case ARM::VLD4DUPd8Pseudo_UPD:
+    case ARM::VLD4DUPd16Pseudo_UPD:
+    case ARM::VLD4DUPd32Pseudo_UPD:
+    case ARM::VLD1LNq8Pseudo:
+    case ARM::VLD1LNq16Pseudo:
+    case ARM::VLD1LNq32Pseudo:
+    case ARM::VLD1LNq8Pseudo_UPD:
+    case ARM::VLD1LNq16Pseudo_UPD:
+    case ARM::VLD1LNq32Pseudo_UPD:
+    case ARM::VLD2LNd8Pseudo:
+    case ARM::VLD2LNd16Pseudo:
+    case ARM::VLD2LNd32Pseudo:
+    case ARM::VLD2LNq16Pseudo:
+    case ARM::VLD2LNq32Pseudo:
+    case ARM::VLD2LNd8Pseudo_UPD:
+    case ARM::VLD2LNd16Pseudo_UPD:
+    case ARM::VLD2LNd32Pseudo_UPD:
+    case ARM::VLD2LNq16Pseudo_UPD:
+    case ARM::VLD2LNq32Pseudo_UPD:
+    case ARM::VLD4LNd8Pseudo:
+    case ARM::VLD4LNd16Pseudo:
+    case ARM::VLD4LNd32Pseudo:
+    case ARM::VLD4LNq16Pseudo:
+    case ARM::VLD4LNq32Pseudo:
+    case ARM::VLD4LNd8Pseudo_UPD:
+    case ARM::VLD4LNd16Pseudo_UPD:
+    case ARM::VLD4LNd32Pseudo_UPD:
+    case ARM::VLD4LNq16Pseudo_UPD:
+    case ARM::VLD4LNq32Pseudo_UPD:
+      // If the address is not 64-bit aligned, the latencies of these
+      // instructions increases by one.
+      ++Latency;
+      break;
+    }
+
   return Latency;
 }
 
@@ -2264,9 +2528,7 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   default:
     return ItinData->getStageLatency(get(Opcode).getSchedClass());
   case ARM::VLDMQIA:
-  case ARM::VLDMQDB:
   case ARM::VSTMQIA:
-  case ARM::VSTMQDB:
     return 2;
   }
 }
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 7e2183d..9a2faf8 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -34,25 +34,7 @@ namespace ARMII {
 
     //===------------------------------------------------------------------===//
     // This four-bit field describes the addressing mode used.
-
-    AddrModeMask  = 0x1f,
-    AddrModeNone    = 0,
-    AddrMode1       = 1,
-    AddrMode2       = 2,
-    AddrMode3       = 3,
-    AddrMode4       = 4,
-    AddrMode5       = 5,
-    AddrMode6       = 6,
-    AddrModeT1_1    = 7,
-    AddrModeT1_2    = 8,
-    AddrModeT1_4    = 9,
-    AddrModeT1_s    = 10, // i8 * 4 for pc and sp relative data
-    AddrModeT2_i12  = 11,
-    AddrModeT2_i8   = 12,
-    AddrModeT2_so   = 13,
-    AddrModeT2_pc   = 14, // +/- i12 for pc relative data
-    AddrModeT2_i8s4 = 15, // i8 * 4
-    AddrMode_i12    = 16,
+    AddrModeMask  = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h
 
     // Size* - Flags to keep track of the size of an instruction.
     SizeShift     = 5,
@@ -64,11 +46,9 @@ namespace ARMII {
 
     // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
     // and store ops only.  Generic "updating" flag is used for ld/st multiple.
+    // The index mode enums are declared in ARMBaseInfo.h
     IndexModeShift = 8,
     IndexModeMask  = 3 << IndexModeShift,
-    IndexModePre   = 1,
-    IndexModePost  = 2,
-    IndexModeUpd   = 3,
 
     //===------------------------------------------------------------------===//
     // Instruction encoding formats.
@@ -311,7 +291,7 @@ public:
                                        int64_t &Offset1, int64_t &Offset2)const;
 
   /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
-  /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+  /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
   /// be scheduled togther. On some targets if two loads are loading from
   /// addresses in the same cache line, it's better if they are scheduled
   /// together. This function takes two integers that represent the load offsets
@@ -327,7 +307,7 @@ public:
                                     const MachineFunction &MF) const;
 
   virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
-                                   unsigned NumCyles, unsigned ExtraPredCycles,
+                                   unsigned NumCycles, unsigned ExtraPredCycles,
                                    float Prob, float Confidence) const;
 
   virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
@@ -337,10 +317,10 @@ public:
                                    float Probability, float Confidence) const;
 
   virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
-                                         unsigned NumCyles,
+                                         unsigned NumCycles,
                                          float Probability,
                                          float Confidence) const {
-    return NumCyles == 1;
+    return NumCycles == 1;
   }
 
   /// AnalyzeCompare - For a comparison instruction, return the source register
@@ -496,19 +476,19 @@ void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator &MBBI, DebugLoc dl,
                              unsigned DestReg, unsigned BaseReg, int NumBytes,
                              ARMCC::CondCodes Pred, unsigned PredReg,
-                             const ARMBaseInstrInfo &TII);
+                             const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
 
 void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator &MBBI, DebugLoc dl,
                             unsigned DestReg, unsigned BaseReg, int NumBytes,
                             ARMCC::CondCodes Pred, unsigned PredReg,
-                            const ARMBaseInstrInfo &TII);
+                            const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
 void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator &MBBI,
+                               MachineBasicBlock::iterator &MBBI, DebugLoc dl,
                                unsigned DestReg, unsigned BaseReg,
                                int NumBytes, const TargetInstrInfo &TII,
                                const ARMBaseRegisterInfo& MRI,
-                               DebugLoc dl);
+                               unsigned MIFlags = 0);
 
 
 /// rewriteARMFrameIndex / rewriteT2FrameIndex -
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 67a4b7d..ea1f08a 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -88,7 +88,7 @@ BitVector ARMBaseRegisterInfo::
 getReservedRegs(const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
 
-  // FIXME: avoid re-calculating this everytime.
+  // FIXME: avoid re-calculating this every time.
   BitVector Reserved(getNumRegs());
   Reserved.set(ARM::SP);
   Reserved.set(ARM::PC);
@@ -342,12 +342,51 @@ ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC,
   return false;
 }
 
+const TargetRegisterClass*
+ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
+                                                                         const {
+  const TargetRegisterClass *Super = RC;
+  TargetRegisterClass::sc_iterator I = RC->superclasses_begin();
+  do {
+    switch (Super->getID()) {
+    case ARM::GPRRegClassID:
+    case ARM::SPRRegClassID:
+    case ARM::DPRRegClassID:
+    case ARM::QPRRegClassID:
+    case ARM::QQPRRegClassID:
+    case ARM::QQQQPRRegClassID:
+      return Super;
+    }
+    Super = *I++;
+  } while (Super);
+  return RC;
+}
 
 const TargetRegisterClass *
 ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
   return ARM::GPRRegisterClass;
 }
 
+unsigned
+ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+                                         MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  switch (RC->getID()) {
+  default:
+    return 0;
+  case ARM::tGPRRegClassID:
+    return TFI->hasFP(MF) ? 4 : 5;
+  case ARM::GPRRegClassID: {
+    unsigned FP = TFI->hasFP(MF) ? 1 : 0;
+    return 10 - FP - (STI.isR9Reserved() ? 1 : 0);
+  }
+  case ARM::SPRRegClassID:  // Currently not used as 'rep' register class.
+  case ARM::DPRRegClassID:
+    return 32 - 10;
+  }
+}
+
 /// getAllocationOrder - Returns the register allocation order for a specified
 /// register class in the form of a pair of TargetRegisterClass iterators.
 std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
@@ -428,6 +467,10 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
     ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
   };
 
+  // We only support even/odd hints for GPR and rGPR.
+  if (RC != ARM::GPRRegisterClass && RC != ARM::rGPRRegisterClass)
+    return std::make_pair(RC->allocation_order_begin(MF),
+                          RC->allocation_order_end(MF));
 
   if (HintType == ARMRI::RegPairEven) {
     if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
@@ -530,6 +573,29 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
   }
 }
 
+bool
+ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
+  // CortexA9 has a Write-after-write hazard for NEON registers.
+  if (!STI.isCortexA9())
+    return false;
+
+  switch (RC->getID()) {
+  case ARM::DPRRegClassID:
+  case ARM::DPR_8RegClassID:
+  case ARM::DPR_VFP2RegClassID:
+  case ARM::QPRRegClassID:
+  case ARM::QPR_8RegClassID:
+  case ARM::QPR_VFP2RegClassID:
+  case ARM::SPRRegClassID:
+  case ARM::SPR_8RegClassID:
+    // Avoid reusing S, D, and Q registers.
+    // Don't increase register pressure for QQ and QQQQ.
+    return true;
+  default:
+    return false;
+  }
+}
+
 bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -806,7 +872,7 @@ emitLoadConstPool(MachineBasicBlock &MBB,
                   DebugLoc dl,
                   unsigned DestReg, unsigned SubIdx, int Val,
                   ARMCC::CondCodes Pred,
-                  unsigned PredReg) const {
+                  unsigned PredReg, unsigned MIFlags) const {
   MachineFunction &MF = *MBB.getParent();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
   const Constant *C =
@@ -816,7 +882,8 @@ emitLoadConstPool(MachineBasicBlock &MBB,
   BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
     .addReg(DestReg, getDefRegState(true), SubIdx)
     .addConstantPoolIndex(Idx)
-    .addImm(0).addImm(Pred).addReg(PredReg);
+    .addImm(0).addImm(Pred).addReg(PredReg)
+    .setMIFlags(MIFlags);
 }
 
 bool ARMBaseRegisterInfo::
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index ba6bd2b..9edf72d 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -128,6 +128,12 @@ public:
 
   const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
 
+  const TargetRegisterClass*
+  getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+  unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                               MachineFunction &MF) const;
+
   std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
   getAllocationOrder(const TargetRegisterClass *RC,
                      unsigned HintType, unsigned HintReg,
@@ -139,6 +145,8 @@ public:
   void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
                           MachineFunction &MF) const;
 
+  virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+
   bool hasBasePointer(const MachineFunction &MF) const;
 
   bool canRealignStack(const MachineFunction &MF) const;
@@ -176,7 +184,8 @@ public:
                                  unsigned DestReg, unsigned SubIdx,
                                  int Val,
                                  ARMCC::CondCodes Pred = ARMCC::AL,
-                                 unsigned PredReg = 0) const;
+                                 unsigned PredReg = 0,
+                                 unsigned MIFlags = MachineInstr::NoFlags)const;
 
   /// Code Generation virtual methods...
   virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 426ba13..d2981c0 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -22,6 +22,9 @@ class CCIfAlign<string Align, CCAction A>:
 //===----------------------------------------------------------------------===//
 def CC_ARM_APCS : CallingConv<[
 
+  // Handles byval parameters.
+  CCIfByVal<CCPassByVal<4, 4>>,
+    
   CCIfType<[i8, i16], CCPromoteToType<i32>>,
 
   // Handle all vector types as either f64 or v2f64.
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 9bbf6a0..fa73716 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -312,6 +312,15 @@ namespace {
     unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op)
       const { return 0; }
 
+    unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+
     /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
     /// machine operand requires relocation, record the relocation and return
     /// zero.
@@ -969,7 +978,7 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
 
 unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
                                          const TargetInstrDesc &TID) const {
-  for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){
+  for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i >= e; --i){
     const MachineOperand &MO = MI.getOperand(i-1);
     if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
       return 1 << ARMII::S_BitShift;
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 13d1b33..baf95a3 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1650,24 +1650,27 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
     unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
     unsigned DestOffset = BBOffsets[DestBB->getNumber()];
     if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
-      MachineBasicBlock::iterator CmpMI = Br.MI; --CmpMI;
-      if (CmpMI->getOpcode() == ARM::tCMPi8) {
-        unsigned Reg = CmpMI->getOperand(0).getReg();
-        Pred = llvm::getInstrPredicate(CmpMI, PredReg);
-        if (Pred == ARMCC::AL &&
-            CmpMI->getOperand(1).getImm() == 0 &&
-            isARMLowRegister(Reg)) {
-          MachineBasicBlock *MBB = Br.MI->getParent();
-          MachineInstr *NewBR =
-            BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
-            .addReg(Reg).addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags());
-          CmpMI->eraseFromParent();
-          Br.MI->eraseFromParent();
-          Br.MI = NewBR;
-          BBSizes[MBB->getNumber()] -= 2;
-          AdjustBBOffsetsAfter(MBB, -2);
-          ++NumCBZ;
-          MadeChange = true;
+      MachineBasicBlock::iterator CmpMI = Br.MI;
+      if (CmpMI != Br.MI->getParent()->begin()) {
+        --CmpMI;
+        if (CmpMI->getOpcode() == ARM::tCMPi8) {
+          unsigned Reg = CmpMI->getOperand(0).getReg();
+          Pred = llvm::getInstrPredicate(CmpMI, PredReg);
+          if (Pred == ARMCC::AL &&
+              CmpMI->getOperand(1).getImm() == 0 &&
+              isARMLowRegister(Reg)) {
+            MachineBasicBlock *MBB = Br.MI->getParent();
+            MachineInstr *NewBR =
+              BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+              .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
+            CmpMI->eraseFromParent();
+            Br.MI->eraseFromParent();
+            Br.MI = NewBR;
+            BBSizes[MBB->getNumber()] -= 2;
+            AdjustBBOffsetsAfter(MBB, -2);
+            ++NumCBZ;
+            MadeChange = true;
+          }
         }
       }
     }
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index bd753d2..b6b3c75 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -455,6 +455,10 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
   // Add an implicit def for the super-register.
   MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
   TransferImpOps(MI, MIB, MIB);
+
+  // Transfer memoperands.
+  MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
   MI.eraseFromParent();
 }
 
@@ -496,10 +500,13 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
   MIB.addOperand(MI.getOperand(OpIdx++));
   MIB.addOperand(MI.getOperand(OpIdx++));
 
-  if (SrcIsKill)
-    // Add an implicit kill for the super-reg.
-    (*MIB).addRegisterKilled(SrcReg, TRI, true);
+  if (SrcIsKill) // Add an implicit kill for the super-reg.
+    MIB->addRegisterKilled(SrcReg, TRI, true);
   TransferImpOps(MI, MIB, MIB);
+
+  // Transfer memoperands.
+  MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
   MI.eraseFromParent();
 }
 
@@ -622,9 +629,8 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
   MIB.addOperand(MI.getOperand(OpIdx++));
   MIB.addOperand(MI.getOperand(OpIdx++));
 
-  if (SrcIsKill)
-    // Add an implicit kill for the super-reg.
-    (*MIB).addRegisterKilled(SrcReg, TRI, true);
+  if (SrcIsKill)  // Add an implicit kill for the super-reg.
+    MIB->addRegisterKilled(SrcReg, TRI, true);
   TransferImpOps(MI, MIB, MIB);
   MI.eraseFromParent();
 }
@@ -655,8 +661,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
     unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
     LO16 = LO16.addImm(SOImmValV1);
     HI16 = HI16.addImm(SOImmValV2);
-    (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-    (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
     LO16.addImm(Pred).addReg(PredReg).addReg(0);
     HI16.addImm(Pred).addReg(PredReg).addReg(0);
     TransferImpOps(MI, LO16, HI16);
@@ -692,8 +698,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
     HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
   }
 
-  (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-  (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
   LO16.addImm(Pred).addReg(PredReg);
   HI16.addImm(Pred).addReg(PredReg);
 
@@ -708,6 +714,78 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
   switch (Opcode) {
     default:
       return false;
+    case ARM::VMOVScc:
+    case ARM::VMOVDcc: {
+      unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
+              MI.getOperand(1).getReg())
+        .addReg(MI.getOperand(2).getReg(),
+                getKillRegState(MI.getOperand(2).isKill()))
+        .addImm(MI.getOperand(3).getImm()) // 'pred'
+        .addReg(MI.getOperand(4).getReg());
+
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::MOVCCr: {
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVr),
+              MI.getOperand(1).getReg())
+        .addReg(MI.getOperand(2).getReg(),
+                getKillRegState(MI.getOperand(2).isKill()))
+        .addImm(MI.getOperand(3).getImm()) // 'pred'
+        .addReg(MI.getOperand(4).getReg())
+        .addReg(0); // 's' bit
+
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::MOVCCs: {
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+              (MI.getOperand(1).getReg()))
+        .addReg(MI.getOperand(2).getReg(),
+                getKillRegState(MI.getOperand(2).isKill()))
+        .addReg(MI.getOperand(3).getReg(),
+                getKillRegState(MI.getOperand(3).isKill()))
+        .addImm(MI.getOperand(4).getImm())
+        .addImm(MI.getOperand(5).getImm()) // 'pred'
+        .addReg(MI.getOperand(6).getReg())
+        .addReg(0); // 's' bit
+
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::MOVCCi16: {
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi16),
+              MI.getOperand(1).getReg())
+        .addImm(MI.getOperand(2).getImm())
+        .addImm(MI.getOperand(3).getImm()) // 'pred'
+        .addReg(MI.getOperand(4).getReg());
+
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::MOVCCi: {
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi),
+              MI.getOperand(1).getReg())
+        .addImm(MI.getOperand(2).getImm())
+        .addImm(MI.getOperand(3).getImm()) // 'pred'
+        .addReg(MI.getOperand(4).getReg())
+        .addReg(0); // 's' bit
+
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::MVNCCi: {
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi),
+              MI.getOperand(1).getReg())
+        .addImm(MI.getOperand(2).getImm())
+        .addImm(MI.getOperand(3).getImm()) // 'pred'
+        .addReg(MI.getOperand(4).getReg())
+        .addReg(0); // 's' bit
+
+      MI.eraseFromParent();
+      return true;
+    }
     case ARM::Int_eh_sjlj_dispatchsetup: {
       MachineFunction &MF = *MI.getParent()->getParent();
       const ARMBaseInstrInfo *AII =
@@ -726,9 +804,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
           llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
                                        FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
         } else if (AFI->isThumbFunction()) {
-          llvm::emitThumbRegPlusImmediate(MBB, MBBI, ARM::R6,
-                                          FramePtr, -NumBytes,
-                                          *TII, RI, MI.getDebugLoc());
+          llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+                                          FramePtr, -NumBytes, *TII, RI);
         } else {
           llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
                                         FramePtr, -NumBytes, ARMCC::AL, 0,
@@ -785,7 +862,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
                 TII->get(ARM::BL))
         .addExternalSymbol("__aeabi_read_tp", 0);
 
-      (*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
       TransferImpOps(MI, MIB, MIB);
       MI.eraseFromParent();
       return true;
@@ -800,7 +877,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
         AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
                                TII->get(NewLdOpc), DstReg)
                        .addOperand(MI.getOperand(1)));
-      (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
       MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
                                          TII->get(ARM::tPICADD))
         .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
@@ -823,7 +900,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       const MachineOperand &MO1 = MI.getOperand(1);
       const GlobalValue *GV = MO1.getGlobal();
       unsigned TF = MO1.getTargetFlags();
-      bool isARM = Opcode != ARM::t2MOV_ga_pcrel;
+      bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode != ARM::t2MOV_ga_dyn);
       bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn);
       unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
       unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel : ARM::t2MOVTi16_ga_pcrel;
@@ -856,7 +933,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       if (isARM) {
         AddDefaultPred(MIB3);
         if (Opcode == ARM::MOV_ga_pcrel_ldr)
-          (*MIB2).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+          MIB2->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
       }
       TransferImpOps(MI, MIB1, MIB3);
       MI.eraseFromParent();
@@ -896,9 +973,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       return true;
     }
 
-    case ARM::VLDMQIA:
-    case ARM::VLDMQDB: {
-      unsigned NewOpc = (Opcode == ARM::VLDMQIA) ? ARM::VLDMDIA : ARM::VLDMDDB;
+    case ARM::VLDMQIA: {
+      unsigned NewOpc = ARM::VLDMDIA;
       MachineInstrBuilder MIB =
         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
       unsigned OpIdx = 0;
@@ -927,9 +1003,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       return true;
     }
 
-    case ARM::VSTMQIA:
-    case ARM::VSTMQDB: {
-      unsigned NewOpc = (Opcode == ARM::VSTMQIA) ? ARM::VSTMDIA : ARM::VSTMDDB;
+    case ARM::VSTMQIA: {
+      unsigned NewOpc = ARM::VSTMDIA;
       MachineInstrBuilder MIB =
         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
       unsigned OpIdx = 0;
@@ -950,9 +1025,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
       MIB.addReg(D0).addReg(D1);
 
-      if (SrcIsKill)
-        // Add an implicit kill for the Q register.
-        (*MIB).addRegisterKilled(SrcReg, TRI, true);
+      if (SrcIsKill)      // Add an implicit kill for the Q register.
+        MIB->addRegisterKilled(SrcReg, TRI, true);
 
       TransferImpOps(MI, MIB, MIB);
       MI.eraseFromParent();
@@ -960,14 +1034,16 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     }
     case ARM::VDUPfqf:
     case ARM::VDUPfdf:{
-      unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLNfq : ARM::VDUPLNfd;
+      unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLN32q :
+        ARM::VDUPLN32d;
       MachineInstrBuilder MIB =
         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
       unsigned OpIdx = 0;
       unsigned SrcReg = MI.getOperand(1).getReg();
       unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
       unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
-                                               Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
+                            Lane & 1 ? ARM::ssub_1 : ARM::ssub_0,
+                            &ARM::DPR_VFP2RegClass);
       // The lane is [0,1] for the containing DReg superregister.
       // Copy the dst/src register operands.
       MIB.addOperand(MI.getOperand(OpIdx++));
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 26f48b3..3baf274 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARM.h"
+#include "ARMAddressingModes.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMCallingConv.h"
 #include "ARMRegisterInfo.h"
@@ -26,6 +27,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
+#include "llvm/Operator.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -115,6 +117,11 @@ class ARMFastISel : public FastISel {
                                      const TargetRegisterClass *RC,
                                      unsigned Op0, bool Op0IsKill,
                                      unsigned Op1, bool Op1IsKill);
+    virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
+                                      const TargetRegisterClass *RC,
+                                      unsigned Op0, bool Op0IsKill,
+                                      unsigned Op1, bool Op1IsKill,
+                                      unsigned Op2, bool Op2IsKill);
     virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
                                      const TargetRegisterClass *RC,
                                      unsigned Op0, bool Op0IsKill,
@@ -123,14 +130,18 @@ class ARMFastISel : public FastISel {
                                      const TargetRegisterClass *RC,
                                      unsigned Op0, bool Op0IsKill,
                                      const ConstantFP *FPImm);
-    virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
-                                    const TargetRegisterClass *RC,
-                                    uint64_t Imm);
     virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
                                       const TargetRegisterClass *RC,
                                       unsigned Op0, bool Op0IsKill,
                                       unsigned Op1, bool Op1IsKill,
                                       uint64_t Imm);
+    virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+                                    const TargetRegisterClass *RC,
+                                    uint64_t Imm);
+    virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
+                                     const TargetRegisterClass *RC,
+                                     uint64_t Imm1, uint64_t Imm2);
+
     virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
                                                 unsigned Op0, bool Op0IsKill,
                                                 uint32_t Idx);
@@ -193,6 +204,7 @@ class ARMFastISel : public FastISel {
 
     // OptionalDef handling routines.
   private:
+    bool isARMNEONPred(const MachineInstr *MI);
     bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
     const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
     void AddLoadStoreOperands(EVT VT, Address &Addr,
@@ -221,6 +233,21 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
   return true;
 }
 
+bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
+  const TargetInstrDesc &TID = MI->getDesc();
+
+  // If we're a thumb2 or not NEON function we were handled via isPredicable.
+  if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
+       AFI->isThumb2Function())
+    return false;
+
+  for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i)
+    if (TID.OpInfo[i].isPredicate())
+      return true;
+
+  return false;
+}
+
 // If the machine is predicable go ahead and add the predicate operands, if
 // it needs default CC operands add those.
 // TODO: If we want to support thumb1 then we'll need to deal with optional
@@ -230,8 +257,10 @@ const MachineInstrBuilder &
 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
   MachineInstr *MI = &*MIB;
 
-  // Do we use a predicate?
-  if (TII.isPredicable(MI))
+  // Do we use a predicate? or...
+  // Are we NEON in ARM mode and have a predicate operand? If so, I know
+  // we're not predicable but add it anyways.
+  if (TII.isPredicable(MI) || isARMNEONPred(MI))
     AddDefaultPred(MIB);
 
   // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
@@ -296,6 +325,31 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
+unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+                                       const TargetRegisterClass *RC,
+                                       unsigned Op0, bool Op0IsKill,
+                                       unsigned Op1, bool Op1IsKill,
+                                       unsigned Op2, bool Op2IsKill) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addReg(Op1, Op1IsKill * RegState::Kill)
+                   .addReg(Op2, Op2IsKill * RegState::Kill));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addReg(Op1, Op1IsKill * RegState::Kill)
+                   .addReg(Op2, Op2IsKill * RegState::Kill));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                           TII.get(TargetOpcode::COPY), ResultReg)
+                   .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
 unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
                                       const TargetRegisterClass *RC,
                                       unsigned Op0, bool Op0IsKill,
@@ -384,6 +438,26 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
+unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+                                      const TargetRegisterClass *RC,
+                                      uint64_t Imm1, uint64_t Imm2) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                    .addImm(Imm1).addImm(Imm2));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                    .addImm(Imm1).addImm(Imm2));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 
+                            TII.get(TargetOpcode::COPY),
+                            ResultReg)
+                    .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
 unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
                                                  unsigned Op0, bool Op0IsKill,
                                                  uint32_t Idx) {
@@ -667,24 +741,29 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
           TmpOffset += SL->getElementOffset(Idx);
         } else {
           uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
-          SmallVector<const Value *, 4> Worklist;
-          Worklist.push_back(Op);
-          do {
-            Op = Worklist.pop_back_val();
+          for (;;) {
             if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
               // Constant-offset addressing.
               TmpOffset += CI->getSExtValue() * S;
-            } else if (isa<AddOperator>(Op) &&
-                       isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
-              // An add with a constant operand. Fold the constant.
+              break;
+            }
+            if (isa<AddOperator>(Op) &&
+                (!isa<Instruction>(Op) ||
+                 FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+                 == FuncInfo.MBB) &&
+                isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+              // An add (in the same block) with a constant operand. Fold the
+              // constant.
               ConstantInt *CI =
-                cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+              cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
               TmpOffset += CI->getSExtValue() * S;
-              // Add the other operand back to the work list.
-              Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
-            } else
-              goto unsupported_gep;
-          } while (!Worklist.empty());
+              // Iterate on the other operand.
+              Op = cast<AddOperator>(Op)->getOperand(0);
+              continue;
+            }
+            // Unsupported
+            goto unsupported_gep;
+          }
         }
       }
 
@@ -767,26 +846,9 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
   // Since the offset is too large for the load/store instruction
   // get the reg+offset into a register.
   if (needsLowering) {
-    ARMCC::CondCodes Pred = ARMCC::AL;
-    unsigned PredReg = 0;
-
-    TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
-      ARM::GPRRegisterClass;
-    unsigned BaseReg = createResultReg(RC);
-
-    if (!isThumb)
-      emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                              BaseReg, Addr.Base.Reg, Addr.Offset,
-                              Pred, PredReg,
-                              static_cast<const ARMBaseInstrInfo&>(TII));
-    else {
-      assert(AFI->isThumb2Function());
-      emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                             BaseReg, Addr.Base.Reg, Addr.Offset, Pred, PredReg,
-                             static_cast<const ARMBaseInstrInfo&>(TII));
-    }
+    Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
+                                 /*Op0IsKill*/false, Addr.Offset, MVT::i32);
     Addr.Offset = 0;
-    Addr.Base.Reg = BaseReg;
   }
 }
 
@@ -797,7 +859,7 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
   if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
       VT.getSimpleVT().SimpleTy == MVT::f64)
     Addr.Offset /= 4;
-    
+
   // Frame base works a bit differently. Handle it separately.
   if (Addr.BaseType == Address::FrameIndexBase) {
     int FI = Addr.Base.FI;
@@ -819,7 +881,7 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
   } else {
     // Now add the rest of the operands.
     MIB.addReg(Addr.Base.Reg);
-  
+
     // ARM halfword load/stores need an additional operand.
     if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
 
@@ -1007,18 +1069,16 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
   // behavior.
   // TODO: Factor this out.
   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
-    if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
-      MVT VT;
-      const Type *Ty = CI->getOperand(0)->getType();
-      if (!isTypeLegal(Ty, VT))
-        return false;
-
+    MVT SourceVT;
+    const Type *Ty = CI->getOperand(0)->getType();
+    if (CI->hasOneUse() && (CI->getParent() == I->getParent())
+        && isTypeLegal(Ty, SourceVT)) {
       bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
       if (isFloat && !Subtarget->hasVFP2())
         return false;
 
       unsigned CmpOpc;
-      switch (VT.SimpleTy) {
+      switch (SourceVT.SimpleTy) {
         default: return false;
         // TODO: Verify compares.
         case MVT::f32:
@@ -1033,7 +1093,14 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
       }
 
       // Get the compare predicate.
-      ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+      // Try to take advantage of fallthrough opportunities.
+      CmpInst::Predicate Predicate = CI->getPredicate();
+      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+        std::swap(TBB, FBB);
+        Predicate = CmpInst::getInversePredicate(Predicate);
+      }
+
+      ARMCC::CondCodes ARMPred = getComparePred(Predicate);
 
       // We may not handle every CC for now.
       if (ARMPred == ARMCC::AL) return false;
@@ -1061,19 +1128,55 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
       FuncInfo.MBB->addSuccessor(TBB);
       return true;
     }
+  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+    MVT SourceVT;
+    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+        (isTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
+      unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+      unsigned OpReg = getRegForValue(TI->getOperand(0));
+      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              TII.get(TstOpc))
+                      .addReg(OpReg).addImm(1));
+
+      unsigned CCMode = ARMCC::NE;
+      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+        std::swap(TBB, FBB);
+        CCMode = ARMCC::EQ;
+      }
+
+      unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+      .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
+
+      FastEmitBranch(FBB, DL);
+      FuncInfo.MBB->addSuccessor(TBB);
+      return true;
+    }
   }
 
   unsigned CmpReg = getRegForValue(BI->getCondition());
   if (CmpReg == 0) return false;
 
-  // Re-set the flags just in case.
-  unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri;
-  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
-                  .addReg(CmpReg).addImm(0));
+  // We've been divorced from our compare!  Our block was split, and
+  // now our compare lives in a predecessor block.  We musn't
+  // re-compare here, as the children of the compare aren't guaranteed
+  // live across the block boundary (we *could* check for this).
+  // Regardless, the compare has been done in the predecessor block,
+  // and it left a value for us in a virtual register.  Ergo, we test
+  // the one-bit value left in the virtual register.
+  unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
+                  .addReg(CmpReg).addImm(1));
+
+  unsigned CCMode = ARMCC::NE;
+  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+    std::swap(TBB, FBB);
+    CCMode = ARMCC::EQ;
+  }
 
   unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
-                  .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+                  .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
   FastEmitBranch(FBB, DL);
   FuncInfo.MBB->addSuccessor(TBB);
   return true;
@@ -1636,17 +1739,9 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
 
 unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
 
-  // Depend our opcode for thumb on whether or not we're targeting an
-  // externally callable function. For libcalls we'll just pass a NULL GV
-  // in here.
-  bool isExternal = false;
-  if (!GV || GV->hasExternalLinkage()) isExternal = true;
-  
   // Darwin needs the r9 versions of the opcodes.
   bool isDarwin = Subtarget->isTargetDarwin();
-  if (isThumb && isExternal) {
-    return isDarwin ? ARM::tBLXi_r9 : ARM::tBLXi;
-  } else if (isThumb) {
+  if (isThumb) {
     return isDarwin ? ARM::tBLr9 : ARM::tBL;
   } else  {
     return isDarwin ? ARM::BLr9 : ARM::BL;
@@ -1671,9 +1766,6 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
   else if (!isTypeLegal(RetTy, RetVT))
     return false;
 
-  // For now we're using BLX etc on the assumption that we have v5t ops.
-  if (!Subtarget->hasV5TOps()) return false;
-
   // TODO: For now if we have long calls specified we don't handle the call.
   if (EnableARMLongCalls) return false;
 
@@ -1711,7 +1803,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
   if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
     return false;
 
-  // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+  // Issue the call, BLr9 for darwin, BL otherwise.
   // TODO: Turn this into the table of arm call ops.
   MachineInstrBuilder MIB;
   unsigned CallOpc = ARMSelectCallOp(NULL);
@@ -1772,13 +1864,9 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
   else if (!isTypeLegal(RetTy, RetVT))
     return false;
 
-  // For now we're using BLX etc on the assumption that we have v5t ops.
-  // TODO: Maybe?
-  if (!Subtarget->hasV5TOps()) return false;
-
   // TODO: For now if we have long calls specified we don't handle the call.
   if (EnableARMLongCalls) return false;
-  
+
   // Set up the argument vectors.
   SmallVector<Value*, 8> Args;
   SmallVector<unsigned, 8> ArgRegs;
@@ -1827,7 +1915,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
   if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
     return false;
 
-  // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+  // Issue the call, BLr9 for darwin, BL otherwise.
   // TODO: Turn this into the table of arm call ops.
   MachineInstrBuilder MIB;
   unsigned CallOpc = ARMSelectCallOp(GV);
@@ -1842,7 +1930,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
     MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                          TII.get(CallOpc))
           .addGlobalAddress(GV, 0, 0));
-  
+
   // Add implicit physical register uses to the call.
   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
     MIB.addReg(RegArgs[i]);
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 68c33f0..e2e95d4 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -106,14 +106,13 @@ static void
 emitSPUpdate(bool isARM,
              MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
              DebugLoc dl, const ARMBaseInstrInfo &TII,
-             int NumBytes,
-             ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+             int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
   if (isARM)
     emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
-                            Pred, PredReg, TII);
+                            ARMCC::AL, 0, TII, MIFlags);
   else
     emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
-                           Pred, PredReg, TII);
+                           ARMCC::AL, 0, TII, MIFlags);
 }
 
 void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -141,11 +140,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
 
   // Allocate the vararg register save area. This is not counted in NumBytes.
   if (VARegSaveSize)
-    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize);
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
+                 MachineInstr::FrameSetup);
 
   if (!AFI->hasStackFrame()) {
     if (NumBytes != 0)
-      emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+                   MachineInstr::FrameSetup);
     return;
   }
 
@@ -196,7 +197,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
     unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
     MachineInstrBuilder MIB =
       BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
-      .addFrameIndex(FramePtrSpillFI).addImm(0);
+      .addFrameIndex(FramePtrSpillFI).addImm(0)
+      .setMIFlag(MachineInstr::FrameSetup);
     AddDefaultCC(AddDefaultPred(MIB));
   }
 
@@ -226,7 +228,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   NumBytes = DPRCSOffset;
   if (NumBytes) {
     // Adjust SP after all the callee-save spills.
-    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+                 MachineInstr::FrameSetup);
     if (HasFP && isARM)
       // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
       // Note it's not safe to do this in Thumb2 mode because it would have
@@ -282,6 +285,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   // of the stack pointer is at this point. Any variable size objects
   // will be allocated after this, so we can still use the base pointer
   // to reference locals.
+  // FIXME: Clarify FrameSetup flags here.
   if (RegInfo->hasBasePointer(MF)) {
     if (isARM)
       BuildMI(MBB, MBBI, dl,
@@ -396,8 +400,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
     // Jump to label or value in register.
     if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
       unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
-        ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)
-        : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND);
+        ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd)
+        : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND);
       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
       if (JumpTarget.isGlobal())
         MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
@@ -408,10 +412,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                               JumpTarget.getTargetFlags());
       }
     } else if (RetOpcode == ARM::TCRETURNri) {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
+      BuildMI(MBB, MBBI, dl,
+              TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
         addReg(JumpTarget.getReg(), RegState::Kill);
     } else if (RetOpcode == ARM::TCRETURNriND) {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
+      BuildMI(MBB, MBBI, dl,
+              TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)).
         addReg(JumpTarget.getReg(), RegState::Kill);
     }
 
@@ -439,8 +445,7 @@ ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
 
 int
 ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
-                                             int FI,
-                                             unsigned &FrameReg,
+                                             int FI, unsigned &FrameReg,
                                              int SPAdj) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const ARMBaseRegisterInfo *RegInfo =
@@ -484,19 +489,23 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
       return FPOffset;
     } else if (MFI->hasVarSizedObjects()) {
       assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
-      // Try to use the frame pointer if we can, else use the base pointer
-      // since it's available. This is handy for the emergency spill slot, in
-      // particular.
       if (AFI->isThumb2Function()) {
+        // Try to use the frame pointer if we can, else use the base pointer
+        // since it's available. This is handy for the emergency spill slot, in
+        // particular.
         if (FPOffset >= -255 && FPOffset < 0) {
           FrameReg = RegInfo->getFrameRegister(MF);
           return FPOffset;
         }
-      } else
-        FrameReg = RegInfo->getBaseRegister();
+      }
     } else if (AFI->isThumb2Function()) {
+      // Use  add <rd>, sp, #<imm8> 
+      //      ldr <rd>, [sp, #<imm8>]
+      // if at all possible to save space.
+      if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
+        return Offset;
       // In Thumb2 mode, the negative offset is very limited. Try to avoid
-      // out of range references.
+      // out of range references. ldr <rt>,[<rn>, #-<imm8>]
       if (FPOffset >= -255 && FPOffset < 0) {
         FrameReg = RegInfo->getFrameRegister(MF);
         return FPOffset;
@@ -524,7 +533,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
                                     const std::vector<CalleeSavedInfo> &CSI,
                                     unsigned StmOpc, unsigned StrOpc,
                                     bool NoGap,
-                                    bool(*Func)(unsigned, bool)) const {
+                                    bool(*Func)(unsigned, bool),
+                                    unsigned MIFlags) const {
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
 
@@ -567,14 +577,14 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
     if (Regs.size() > 1 || StrOpc== 0) {
       MachineInstrBuilder MIB =
         AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
-                       .addReg(ARM::SP));
+                       .addReg(ARM::SP).setMIFlags(MIFlags));
       for (unsigned i = 0, e = Regs.size(); i < e; ++i)
         MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
     } else if (Regs.size() == 1) {
       MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
                                         ARM::SP)
         .addReg(Regs[0].first, getKillRegState(Regs[0].second))
-        .addReg(ARM::SP);
+        .addReg(ARM::SP).setMIFlags(MIFlags);
       // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
       // that refactoring is complete (eventually).
       if (StrOpc == ARM::STR_PRE) {
@@ -676,9 +686,12 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
   unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE;
   unsigned FltOpc = ARM::VSTMDDB_UPD;
-  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register);
-  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register);
-  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register);
+  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
+               MachineInstr::FrameSetup);
+  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
+               MachineInstr::FrameSetup);
+  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
+               MachineInstr::FrameSetup);
 
   return true;
 }
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 1288b70..61bb8af 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -51,7 +51,8 @@ public:
   bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
   int getFrameIndexReference(const MachineFunction &MF, int FI,
                              unsigned &FrameReg) const;
-  int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
+  int ResolveFrameIndexReference(const MachineFunction &MF,
+                                 int FI,
                                  unsigned &FrameReg, int SPAdj) const;
   int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
 
@@ -62,7 +63,8 @@ public:
   void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                     const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
                     unsigned StrOpc, bool NoGap,
-                    bool(*Func)(unsigned, bool)) const;
+                    bool(*Func)(unsigned, bool),
+                    unsigned MIFlags = 0) const;
   void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                    const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc,
                    unsigned LdrOpc, bool isVarArg, bool NoGap,
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index e97ce50..517bba8 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -49,6 +49,8 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
       const TargetInstrDesc &LastTID = LastMI->getDesc();
       // Skip over one non-VFP / NEON instruction.
       if (!LastTID.isBarrier() &&
+          // On A9, AGU and NEON/FPU are muxed.
+          !(STI.isCortexA9() && (LastTID.mayLoad() || LastTID.mayStore())) &&
           (LastTID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
         MachineBasicBlock::iterator I = LastMI;
         if (I != LastMI->getParent()->begin()) {
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index f0d5a7d..abe5a31 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -45,7 +45,7 @@ DisableShifterOp("disable-shifter-op", cl::Hidden,
 static cl::opt<bool>
 CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
   cl::desc("Check fp vmla / vmls hazard at isel time"),
-  cl::init(false));
+  cl::init(true));
 
 //===--------------------------------------------------------------------===//
 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
@@ -91,9 +91,14 @@ public:
   bool isShifterOpProfitable(const SDValue &Shift,
                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
   bool SelectShifterOperandReg(SDValue N, SDValue &A,
-                               SDValue &B, SDValue &C);
+                               SDValue &B, SDValue &C,
+                               bool CheckProfitability = true);
   bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
-                                    SDValue &B, SDValue &C);
+                                    SDValue &B, SDValue &C) {
+    // Don't apply the profitability check
+    return SelectShifterOperandReg(N, A, B, C, false);
+  }
+
   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 
@@ -174,16 +179,6 @@ public:
     return ARM_AM::getT2SOImmVal(~Imm) != -1;
   }
 
-  inline bool Pred_so_imm(SDNode *inN) const {
-    ConstantSDNode *N = cast<ConstantSDNode>(inN);
-    return is_so_imm(N->getZExtValue());
-  }
-
-  inline bool Pred_t2_so_imm(SDNode *inN) const {
-    ConstantSDNode *N = cast<ConstantSDNode>(inN);
-    return is_t2_so_imm(N->getZExtValue());
-  }
-
   // Include the pieces autogenerated from the target description.
 #include "ARMGenDAGISel.inc"
 
@@ -373,7 +368,8 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
                                               SDValue &BaseReg,
                                               SDValue &ShReg,
-                                              SDValue &Opc) {
+                                              SDValue &Opc,
+                                              bool CheckProfitability) {
   if (DisableShifterOp)
     return false;
 
@@ -390,7 +386,7 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
     ShImmVal = RHS->getZExtValue() & 31;
   } else {
     ShReg = N.getOperand(1);
-    if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal))
+    if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
       return false;
   }
   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
@@ -398,30 +394,6 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N,
-                                                   SDValue &BaseReg,
-                                                   SDValue &ShReg,
-                                                   SDValue &Opc) {
-  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
-
-  // Don't match base register only case. That is matched to a separate
-  // lower complexity pattern with explicit register operand.
-  if (ShOpcVal == ARM_AM::no_shift) return false;
-
-  BaseReg = N.getOperand(0);
-  unsigned ShImmVal = 0;
-  // Do not check isShifterOpProfitable. This must return true.
-  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-    ShReg = CurDAG->getRegister(0, MVT::i32);
-    ShImmVal = RHS->getZExtValue() & 31;
-  } else {
-    ShReg = N.getOperand(1);
-  }
-  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
-                                  MVT::i32);
-  return true;
-}
-
 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
                                           SDValue &Base,
                                           SDValue &OffImm) {
@@ -437,7 +409,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
       OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
       return true;
     }
-    
+
     if (N.getOpcode() == ARMISD::Wrapper &&
         !(Subtarget->useMovt() &&
                      N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
@@ -1138,7 +1110,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
       OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
       return true;
     }
-    
+
     if (N.getOpcode() == ARMISD::Wrapper &&
                !(Subtarget->useMovt() &&
                  N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
@@ -1183,7 +1155,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
       !CurDAG->isBaseWithConstantOffset(N))
     return false;
-  
+
   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
     int RHSC = (int)RHS->getSExtValue();
     if (N.getOpcode() == ISD::SUB)
@@ -1571,6 +1543,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
                                  Ops.data(), Ops.size());
   }
 
+  // Transfer memoperands.
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
+
   if (NumVecs == 1)
     return VLd;
 
@@ -1600,6 +1577,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
     return NULL;
 
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
   SDValue Chain = N->getOperand(0);
   EVT VT = N->getOperand(Vec0Idx).getValueType();
   bool is64BitVector = VT.is64BitVector();
@@ -1672,7 +1652,13 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
     Ops.push_back(Pred);
     Ops.push_back(Reg0);
     Ops.push_back(Chain);
-    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+    SDNode *VSt =
+      CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+
+    // Transfer memoperands.
+    cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
+
+    return VSt;
   }
 
   // Otherwise, quad registers are stored with two separate instructions,
@@ -1693,6 +1679,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
                                         MemAddr.getValueType(),
                                         MVT::Other, OpsA, 7);
+  cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
   Chain = SDValue(VStA, 1);
 
   // Store the odd D registers.
@@ -1709,8 +1696,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   Ops.push_back(Pred);
   Ops.push_back(Reg0);
   Ops.push_back(Chain);
-  return CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
-                                Ops.data(), Ops.size());
+  SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+                                        Ops.data(), Ops.size());
+  cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
+  return VStB;
 }
 
 SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
@@ -1726,6 +1715,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
     return NULL;
 
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
   SDValue Chain = N->getOperand(0);
   unsigned Lane =
     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
@@ -1812,6 +1804,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
                                   QOpcodes[OpcodeIndex]);
   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
                                          Ops.data(), Ops.size());
+  cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
   if (!IsLoad)
     return VLdLn;
 
@@ -1838,6 +1831,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
     return NULL;
 
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
   SDValue Chain = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
@@ -1882,12 +1878,13 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
 
   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
   std::vector<EVT> ResTys;
-  ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts));
+  ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
   if (isUpdating)
     ResTys.push_back(MVT::i32);
   ResTys.push_back(MVT::Other);
   SDNode *VLdDup =
     CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+  cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
   SuperReg = SDValue(VLdDup, 0);
 
   // Extract the subregisters.
@@ -2168,7 +2165,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
   // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
   // Pattern complexity = 6  cost = 11  size = 0
   //
-  // Also FCPYScc and FCPYDcc.
+  // Also VMOVScc and VMOVDcc.
   SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
   SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
   unsigned Opc = 0;
@@ -2450,34 +2447,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   }
   case ARMISD::CMOV:
     return SelectCMOVOp(N);
-  case ARMISD::CNEG: {
-    EVT VT = N->getValueType(0);
-    SDValue N0 = N->getOperand(0);
-    SDValue N1 = N->getOperand(1);
-    SDValue N2 = N->getOperand(2);
-    SDValue N3 = N->getOperand(3);
-    SDValue InFlag = N->getOperand(4);
-    assert(N2.getOpcode() == ISD::Constant);
-    assert(N3.getOpcode() == ISD::Register);
-
-    SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
-                               cast<ConstantSDNode>(N2)->getZExtValue()),
-                               MVT::i32);
-    SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag };
-    unsigned Opc = 0;
-    switch (VT.getSimpleVT().SimpleTy) {
-    default: assert(false && "Illegal conditional move type!");
-      break;
-    case MVT::f32:
-      Opc = ARM::VNEGScc;
-      break;
-    case MVT::f64:
-      Opc = ARM::VNEGDcc;
-      break;
-    }
-    return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
-  }
-
   case ARMISD::VZIP: {
     unsigned Opc = 0;
     EVT VT = N->getValueType(0);
@@ -2870,6 +2839,35 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     break;
   }
 
+  case ARMISD::VTBL1: {
+    DebugLoc dl = N->getDebugLoc();
+    EVT VT = N->getValueType(0);
+    SmallVector<SDValue, 6> Ops;
+
+    Ops.push_back(N->getOperand(0));
+    Ops.push_back(N->getOperand(1));
+    Ops.push_back(getAL(CurDAG));                    // Predicate
+    Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+    return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size());
+  }
+  case ARMISD::VTBL2: {
+    DebugLoc dl = N->getDebugLoc();
+    EVT VT = N->getValueType(0);
+
+    // Form a REG_SEQUENCE to force register allocation.
+    SDValue V0 = N->getOperand(0);
+    SDValue V1 = N->getOperand(1);
+    SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+
+    SmallVector<SDValue, 6> Ops;
+    Ops.push_back(RegSeq);
+    Ops.push_back(N->getOperand(2));
+    Ops.push_back(getAL(CurDAG));                    // Predicate
+    Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+    return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT,
+                                  Ops.data(), Ops.size());
+  }
+
   case ISD::CONCAT_VECTORS:
     return SelectConcatVector(N);
   }
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index ab9f9e1..0a31b87 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -72,6 +72,11 @@ ARMInterworking("arm-interworking", cl::Hidden,
   cl::desc("Enable / disable ARM interworking (for debugging only)"),
   cl::init(true));
 
+// The APCS parameter registers.
+static const unsigned GPRArgRegs[] = {
+  ARM::R0, ARM::R1, ARM::R2, ARM::R3
+};
+
 void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
                                        EVT PromotedBitwiseVT) {
   if (VT != PromotedLdStVT) {
@@ -393,6 +398,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
   }
 
+  // Use divmod iOS compiler-rt calls.
+  if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
+    setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
+    setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
+  }
+
   if (Subtarget->isThumb1Only())
     addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
   else
@@ -461,6 +472,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
     setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
     setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
+    // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
+    // a destination type that is wider than the source.
+    setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
 
     setTargetDAGCombine(ISD::INTRINSIC_VOID);
     setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
@@ -502,18 +517,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   }
 
   // i64 operation support.
+  setOperationAction(ISD::MUL,     MVT::i64, Expand);
+  setOperationAction(ISD::MULHU,   MVT::i32, Expand);
   if (Subtarget->isThumb1Only()) {
-    setOperationAction(ISD::MUL,     MVT::i64, Expand);
-    setOperationAction(ISD::MULHU,   MVT::i32, Expand);
-    setOperationAction(ISD::MULHS,   MVT::i32, Expand);
     setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
-  } else {
-    setOperationAction(ISD::MUL,     MVT::i64, Expand);
-    setOperationAction(ISD::MULHU,   MVT::i32, Expand);
-    if (!Subtarget->hasV6Ops())
-      setOperationAction(ISD::MULHS, MVT::i32, Expand);
   }
+  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops())
+    setOperationAction(ISD::MULHS, MVT::i32, Expand);
+
   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
   setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
@@ -597,6 +609,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8,  Expand);
     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
     // Since the libcalls include locking, fold in the fences
     setShouldFoldAtomicFences(true);
   }
@@ -716,7 +740,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
 // pressure of the register class's representative and all of it's super
 // classes' representatives transitively. We have not implemented this because
 // of the difficulty prior to coalescing of modeling operand register classes
-// due to the common occurence of cross class copies and subregister insertions
+// due to the common occurrence of cross class copies and subregister insertions
 // and extractions.
 std::pair<const TargetRegisterClass*, uint8_t>
 ARMTargetLowering::findRepresentativeClass(EVT VT) const{
@@ -778,7 +802,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
   case ARMISD::CMOV:          return "ARMISD::CMOV";
-  case ARMISD::CNEG:          return "ARMISD::CNEG";
 
   case ARMISD::RBIT:          return "ARMISD::RBIT";
 
@@ -853,6 +876,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VZIP:          return "ARMISD::VZIP";
   case ARMISD::VUZP:          return "ARMISD::VUZP";
   case ARMISD::VTRN:          return "ARMISD::VTRN";
+  case ARMISD::VTBL1:         return "ARMISD::VTBL1";
+  case ARMISD::VTBL2:         return "ARMISD::VTBL2";
   case ARMISD::VMULLs:        return "ARMISD::VMULLs";
   case ARMISD::VMULLu:        return "ARMISD::VMULLu";
   case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
@@ -861,6 +886,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::BFI:           return "ARMISD::BFI";
   case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
   case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
+  case ARMISD::VBSL:          return "ARMISD::VBSL";
   case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
   case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
   case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
@@ -946,27 +972,6 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
   return Sched::RegPressure;
 }
 
-// FIXME: Move to RegInfo
-unsigned
-ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
-                                       MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  switch (RC->getID()) {
-  default:
-    return 0;
-  case ARM::tGPRRegClassID:
-    return TFI->hasFP(MF) ? 4 : 5;
-  case ARM::GPRRegClassID: {
-    unsigned FP = TFI->hasFP(MF) ? 1 : 0;
-    return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
-  }
-  case ARM::SPRRegClassID:  // Currently not used as 'rep' register class.
-  case ARM::DPRRegClassID:
-    return 32 - 10;
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // Lowering Code
 //===----------------------------------------------------------------------===//
@@ -1130,22 +1135,6 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
   return Chain;
 }
 
-/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
-/// by "Src" to address "Dst" of size "Size".  Alignment information is
-/// specified by the specific parameter attribute.  The copy will be passed as
-/// a byval function parameter.
-/// Sometimes what we are copying is the end of a larger object, the part that
-/// does not fit in registers.
-static SDValue
-CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
-                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
-                          DebugLoc dl) {
-  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
-  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
-                       /*isVolatile=*/false, /*AlwaysInline=*/false,
-                       MachinePointerInfo(0), MachinePointerInfo(0));
-}
-
 /// LowerMemOpCallTo - Store the argument to the stack.
 SDValue
 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
@@ -1156,9 +1145,6 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
   unsigned LocMemOffset = VA.getLocMemOffset();
   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
-  if (Flags.isByVal())
-    return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
-
   return DAG.getStore(Chain, dl, Arg, PtrOff,
                       MachinePointerInfo::getStack(LocMemOffset),
                       false, false, 0);
@@ -1224,6 +1210,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
                  *DAG.getContext());
+  CCInfo.setCallOrPrologue(Call);
   CCInfo.AnalyzeCallOperands(Outs,
                              CCAssignFnForNode(CallConv, /* Return*/ false,
                                                isVarArg));
@@ -1253,6 +1240,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     CCValAssign &VA = ArgLocs[i];
     SDValue Arg = OutVals[realArgIdx];
     ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+    bool isByVal = Flags.isByVal();
 
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
@@ -1299,6 +1287,43 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       }
     } else if (VA.isRegLoc()) {
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else if (isByVal) {
+      assert(VA.isMemLoc());
+      unsigned offset = 0;
+
+      // True if this byval aggregate will be split between registers
+      // and memory.
+      if (CCInfo.isFirstByValRegValid()) {
+        EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+        unsigned int i, j;
+        for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
+          SDValue Const = DAG.getConstant(4*i, MVT::i32);
+          SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+                                     MachinePointerInfo(),
+                                     false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(j, Load));
+        }
+        offset = ARM::R4 - CCInfo.getFirstByValReg();
+        CCInfo.clearFirstByValReg();
+      }
+
+      unsigned LocMemOffset = VA.getLocMemOffset();
+      SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
+      SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+                                StkPtrOff);
+      SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
+      SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
+      SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
+                                         MVT::i32);
+      MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
+                                          Flags.getByValAlign(),
+                                          /*isVolatile=*/false,
+                                          /*AlwaysInline=*/false,
+                                          MachinePointerInfo(0),
+                                          MachinePointerInfo(0)));
+
     } else if (!IsSibCall) {
       assert(VA.isMemLoc());
 
@@ -1332,7 +1357,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     // than necessary, because it means that each store effectively depends
     // on every argument instead of just those arguments it would clobber.
 
-    // Do not flag preceeding copytoreg stuff together with the following stuff.
+    // Do not flag preceding copytoreg stuff together with the following stuff.
     InFlag = SDValue();
     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
@@ -1492,6 +1517,35 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                          dl, DAG, InVals);
 }
 
+/// HandleByVal - Every parameter *after* a byval parameter is passed
+/// on the stack.  Remember the next parameter register to allocate,
+/// and then confiscate the rest of the parameter registers to insure
+/// this.
+void
+llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
+  unsigned reg = State->AllocateReg(GPRArgRegs, 4);
+  assert((State->getCallOrPrologue() == Prologue ||
+          State->getCallOrPrologue() == Call) &&
+         "unhandled ParmContext");
+  if ((!State->isFirstByValRegValid()) &&
+      (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+    State->setFirstByValReg(reg);
+    // At a call site, a byval parameter that is split between
+    // registers and memory needs its size truncated here.  In a
+    // function prologue, such byval parameters are reassembled in
+    // memory, and are not truncated.
+    if (State->getCallOrPrologue() == Call) {
+      unsigned excess = 4 * (ARM::R4 - reg);
+      assert(size >= excess && "expected larger existing stack allocation");
+      size -= excess;
+    }
+  }
+  // Confiscate any remaining parameter registers to preclude their
+  // assignment to subsequent parameters.
+  while (State->AllocateReg(GPRArgRegs, 4))
+    ;
+}
+
 /// MatchingStackOffset - Return true if the given stack call argument is
 /// already available in the same position (relatively) of the caller's
 /// incoming argument stack.
@@ -1813,6 +1867,16 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
   return HasRet;
 }
 
+bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+  if (!EnableARMTailCalls)
+    return false;
+
+  if (!CI->isTailCall())
+    return false;
+
+  return !Subtarget->isThumb1Only();
+}
+
 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
 // one of the above mentioned nodes. It has to be wrapped because otherwise
@@ -2096,7 +2160,7 @@ ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
   const {
   DebugLoc dl = Op.getDebugLoc();
   return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
-                     Op.getOperand(0), Op.getOperand(1));
+                     Op.getOperand(0));
 }
 
 SDValue
@@ -2151,6 +2215,13 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
     }
     return Result;
   }
+  case Intrinsic::arm_neon_vmulls:
+  case Intrinsic::arm_neon_vmullu: {
+    unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
+      ? ARMISD::VMULLs : ARMISD::VMULLu;
+    return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  }
   }
 }
 
@@ -2257,6 +2328,88 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
 }
 
+void
+ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
+                                  unsigned &VARegSize, unsigned &VARegSaveSize)
+  const {
+  unsigned NumGPRs;
+  if (CCInfo.isFirstByValRegValid())
+    NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
+  else {
+    unsigned int firstUnalloced;
+    firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
+                                                sizeof(GPRArgRegs) /
+                                                sizeof(GPRArgRegs[0]));
+    NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
+  }
+
+  unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+  VARegSize = NumGPRs * 4;
+  VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+}
+
+// The remaining GPRs hold either the beginning of variable-argument
+// data, or the beginning of an aggregate passed by value (usuall
+// byval).  Either way, we allocate stack slots adjacent to the data
+// provided by our caller, and store the unallocated registers there.
+// If this is a variadic function, the va_list pointer will begin with
+// these values; otherwise, this reassembles a (byval) structure that
+// was split between registers and memory.
+void
+ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+                                        DebugLoc dl, SDValue &Chain,
+                                        unsigned ArgOffset) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned firstRegToSaveIndex;
+  if (CCInfo.isFirstByValRegValid())
+    firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
+  else {
+    firstRegToSaveIndex = CCInfo.getFirstUnallocated
+      (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+  }
+
+  unsigned VARegSize, VARegSaveSize;
+  computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
+  if (VARegSaveSize) {
+    // If this function is vararg, store any remaining integer argument regs
+    // to their spots on the stack so that they may be loaded by deferencing
+    // the result of va_next.
+    AFI->setVarArgsRegSaveSize(VARegSaveSize);
+    AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
+                                                     ArgOffset + VARegSaveSize
+                                                     - VARegSize,
+                                                     false));
+    SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
+                                    getPointerTy());
+
+    SmallVector<SDValue, 4> MemOps;
+    for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
+      TargetRegisterClass *RC;
+      if (AFI->isThumb1OnlyFunction())
+        RC = ARM::tGPRRegisterClass;
+      else
+        RC = ARM::GPRRegisterClass;
+
+      unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
+      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+      SDValue Store =
+        DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                 MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
+                     false, false, 0);
+      MemOps.push_back(Store);
+      FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+                        DAG.getConstant(4, getPointerTy()));
+    }
+    if (!MemOps.empty())
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                          &MemOps[0], MemOps.size());
+  } else
+    // This will point to the next argument passed via stack.
+    AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
+}
+
 SDValue
 ARMTargetLowering::LowerFormalArguments(SDValue Chain,
                                         CallingConv::ID CallConv, bool isVarArg,
@@ -2265,7 +2418,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
                                         DebugLoc dl, SelectionDAG &DAG,
                                         SmallVectorImpl<SDValue> &InVals)
                                           const {
-
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
 
@@ -2275,12 +2427,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
                  *DAG.getContext());
+  CCInfo.setCallOrPrologue(Prologue);
   CCInfo.AnalyzeFormalArguments(Ins,
                                 CCAssignFnForNode(CallConv, /* Return*/ false,
                                                   isVarArg));
 
   SmallVector<SDValue, 16> ArgValues;
+  int lastInsIndex = -1;
 
+  SDValue ArgValue;
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
 
@@ -2288,7 +2443,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
     if (VA.isRegLoc()) {
       EVT RegVT = VA.getLocVT();
 
-      SDValue ArgValue;
       if (VA.needsCustom()) {
         // f64 and vector types are split up into multiple registers or
         // combinations of registers and stack slots.
@@ -2364,67 +2518,45 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
       assert(VA.isMemLoc());
       assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
 
-      unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
-      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true);
+      int index = ArgLocs[i].getValNo();
+
+      // Some Ins[] entries become multiple ArgLoc[] entries.
+      // Process them only once.
+      if (index != lastInsIndex)
+        {
+          ISD::ArgFlagsTy Flags = Ins[index].Flags;
+          // FIXME: For now, all byval parameter objects are marked mutable. 
+          // This can be changed with more analysis.
+          // In case of tail call optimization mark all arguments mutable.
+          // Since they could be overwritten by lowering of arguments in case of
+          // a tail call.
+          if (Flags.isByVal()) {
+            unsigned VARegSize, VARegSaveSize;
+            computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
+            VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0);
+            unsigned Bytes = Flags.getByValSize() - VARegSize;
+            if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
+            int FI = MFI->CreateFixedObject(Bytes,
+                                            VA.getLocMemOffset(), false);
+            InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+          } else {
+            int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+                                            VA.getLocMemOffset(), true);
 
-      // Create load nodes to retrieve arguments from the stack.
-      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
-                                   MachinePointerInfo::getFixedStack(FI),
-                                   false, false, 0));
+            // Create load nodes to retrieve arguments from the stack.
+            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+            InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                         MachinePointerInfo::getFixedStack(FI),
+                                         false, false, 0));
+          }
+          lastInsIndex = index;
+        }
     }
   }
 
   // varargs
-  if (isVarArg) {
-    static const unsigned GPRArgRegs[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3
-    };
-
-    unsigned NumGPRs = CCInfo.getFirstUnallocated
-      (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
-
-    unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
-    unsigned VARegSize = (4 - NumGPRs) * 4;
-    unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
-    unsigned ArgOffset = CCInfo.getNextStackOffset();
-    if (VARegSaveSize) {
-      // If this function is vararg, store any remaining integer argument regs
-      // to their spots on the stack so that they may be loaded by deferencing
-      // the result of va_next.
-      AFI->setVarArgsRegSaveSize(VARegSaveSize);
-      AFI->setVarArgsFrameIndex(
-        MFI->CreateFixedObject(VARegSaveSize,
-                               ArgOffset + VARegSaveSize - VARegSize,
-                               false));
-      SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
-                                      getPointerTy());
-
-      SmallVector<SDValue, 4> MemOps;
-      for (; NumGPRs < 4; ++NumGPRs) {
-        TargetRegisterClass *RC;
-        if (AFI->isThumb1OnlyFunction())
-          RC = ARM::tGPRRegisterClass;
-        else
-          RC = ARM::GPRRegisterClass;
-
-        unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
-        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
-        SDValue Store =
-          DAG.getStore(Val.getValue(1), dl, Val, FIN,
-               MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
-                       false, false, 0);
-        MemOps.push_back(Store);
-        FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
-                          DAG.getConstant(4, getPointerTy()));
-      }
-      if (!MemOps.empty())
-        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                            &MemOps[0], MemOps.size());
-    } else
-      // This will point to the next argument passed via stack.
-      AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
-  }
+  if (isVarArg)
+    VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset());
 
   return Chain;
 }
@@ -2517,6 +2649,27 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
 }
 
+/// duplicateCmp - Glue values can have only one use, so this function
+/// duplicates a comparison node.
+SDValue
+ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
+  unsigned Opc = Cmp.getOpcode();
+  DebugLoc DL = Cmp.getDebugLoc();
+  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
+    return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+
+  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
+  Cmp = Cmp.getOperand(0);
+  Opc = Cmp.getOpcode();
+  if (Opc == ARMISD::CMPFP)
+    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+  else {
+    assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
+    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
+  }
+  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
+}
+
 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   SDValue Cond = Op.getOperand(0);
   SDValue SelectTrue = Op.getOperand(1);
@@ -2552,7 +2705,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
         EVT VT = Cond.getValueType();
         SDValue ARMcc = Cond.getOperand(2);
         SDValue CCR = Cond.getOperand(3);
-        SDValue Cmp = Cond.getOperand(4);
+        SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
         return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
       }
     }
@@ -2681,8 +2834,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
       // If one of the operand is zero, it's safe to ignore the NaN case since
       // we only care about equality comparisons.
       (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
-    // If unsafe fp math optimization is enabled and there are no othter uses of
-    // the CMP operands, and the condition code is EQ oe NE, we can optimize it
+    // If unsafe fp math optimization is enabled and there are no other uses of
+    // the CMP operands, and the condition code is EQ or NE, we can optimize it
     // to an integer comparison.
     if (CC == ISD::SETOEQ)
       CC = ISD::SETEQ;
@@ -2811,8 +2964,39 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 }
 
+static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+
+  EVT OperandVT = Op.getOperand(0).getValueType();
+  assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!");
+  if (VT != MVT::v4f32)
+    return DAG.UnrollVectorOp(Op.getNode());
+
+  unsigned CastOpc;
+  unsigned Opc;
+  switch (Op.getOpcode()) {
+  default:
+    assert(0 && "Invalid opcode!");
+  case ISD::SINT_TO_FP:
+    CastOpc = ISD::SIGN_EXTEND;
+    Opc = ISD::SINT_TO_FP;
+    break;
+  case ISD::UINT_TO_FP:
+    CastOpc = ISD::ZERO_EXTEND;
+    Opc = ISD::UINT_TO_FP;
+    break;
+  }
+
+  Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
+  return DAG.getNode(Opc, dl, VT, Op);
+}
+
 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   EVT VT = Op.getValueType();
+  if (VT.isVector())
+    return LowerVectorINT_TO_FP(Op, DAG);
+
   DebugLoc dl = Op.getDebugLoc();
   unsigned Opc;
 
@@ -2860,7 +3044,10 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
         Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
                            DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
                            DAG.getConstant(32, MVT::i32));
-    }
+    } else if (VT == MVT::f32)
+      Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
+                         DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
+                         DAG.getConstant(32, MVT::i32));
     Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
     Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
 
@@ -2869,11 +3056,11 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
     AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
     SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
                                   DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
-                                              
+
     SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
                               DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
                               DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
-    if (SrcVT == MVT::f32) {
+    if (VT == MVT::f32) {
       Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
       Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
                         DAG.getConstant(0, MVT::i32));
@@ -3508,6 +3695,13 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
   return true;
 }
 
+static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) {
+  // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
+  // range, then 0 is placed into the resulting vector. So pretty much any mask
+  // of 8 elements can work here.
+  return VT == MVT::v8i8 && M.size() == 8;
+}
+
 static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
                        unsigned &WhichResult) {
   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
@@ -3947,6 +4141,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
           isVREVMask(M, VT, 32) ||
           isVREVMask(M, VT, 16) ||
           isVEXTMask(M, VT, ReverseVEXT, Imm) ||
+          isVTBLMask(M, VT) ||
           isVTRNMask(M, VT, WhichResult) ||
           isVUZPMask(M, VT, WhichResult) ||
           isVZIPMask(M, VT, WhichResult) ||
@@ -4024,6 +4219,29 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
   }
 }
 
+static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
+                                       SmallVectorImpl<int> &ShuffleMask,
+                                       SelectionDAG &DAG) {
+  // Check to see if we can use the VTBL instruction.
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  DebugLoc DL = Op.getDebugLoc();
+
+  SmallVector<SDValue, 8> VTBLMask;
+  for (SmallVectorImpl<int>::iterator
+         I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
+    VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
+
+  if (V2.getNode()->getOpcode() == ISD::UNDEF)
+    return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
+                       DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+                                   &VTBLMask[0], 8));
+
+  return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
+                     DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+                                 &VTBLMask[0], 8));
+}
+
 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
@@ -4141,6 +4359,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
   }
 
+  if (VT == MVT::v8i8) {
+    SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
+    if (NewOp.getNode())
+      return NewOp;
+  }
+
   return SDValue();
 }
 
@@ -4290,6 +4514,28 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
                      MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
 }
 
+static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
+  unsigned Opcode = N->getOpcode();
+  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+    SDNode *N0 = N->getOperand(0).getNode();
+    SDNode *N1 = N->getOperand(1).getNode();
+    return N0->hasOneUse() && N1->hasOneUse() &&
+      isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
+  }
+  return false;
+}
+
+static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
+  unsigned Opcode = N->getOpcode();
+  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+    SDNode *N0 = N->getOperand(0).getNode();
+    SDNode *N1 = N->getOperand(1).getNode();
+    return N0->hasOneUse() && N1->hasOneUse() &&
+      isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
+  }
+  return false;
+}
+
 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
   // Multiplications are only custom-lowered for 128-bit vectors so that
   // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
@@ -4298,29 +4544,73 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
   SDNode *N0 = Op.getOperand(0).getNode();
   SDNode *N1 = Op.getOperand(1).getNode();
   unsigned NewOpc = 0;
-  if (isSignExtended(N0, DAG) && isSignExtended(N1, DAG))
+  bool isMLA = false;
+  bool isN0SExt = isSignExtended(N0, DAG);
+  bool isN1SExt = isSignExtended(N1, DAG);
+  if (isN0SExt && isN1SExt)
     NewOpc = ARMISD::VMULLs;
-  else if (isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG))
-    NewOpc = ARMISD::VMULLu;
-  else if (VT == MVT::v2i64)
-    // Fall through to expand this.  It is not legal.
-    return SDValue();
-  else
-    // Other vector multiplications are legal.
-    return Op;
+  else {
+    bool isN0ZExt = isZeroExtended(N0, DAG);
+    bool isN1ZExt = isZeroExtended(N1, DAG);
+    if (isN0ZExt && isN1ZExt)
+      NewOpc = ARMISD::VMULLu;
+    else if (isN1SExt || isN1ZExt) {
+      // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
+      // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
+      if (isN1SExt && isAddSubSExt(N0, DAG)) {
+        NewOpc = ARMISD::VMULLs;
+        isMLA = true;
+      } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
+        NewOpc = ARMISD::VMULLu;
+        isMLA = true;
+      } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
+        std::swap(N0, N1);
+        NewOpc = ARMISD::VMULLu;
+        isMLA = true;
+      }
+    }
+
+    if (!NewOpc) {
+      if (VT == MVT::v2i64)
+        // Fall through to expand this.  It is not legal.
+        return SDValue();
+      else
+        // Other vector multiplications are legal.
+        return Op;
+    }
+  }
 
   // Legalize to a VMULL instruction.
   DebugLoc DL = Op.getDebugLoc();
-  SDValue Op0 = SkipExtension(N0, DAG);
+  SDValue Op0;
   SDValue Op1 = SkipExtension(N1, DAG);
-
-  assert(Op0.getValueType().is64BitVector() &&
-         Op1.getValueType().is64BitVector() &&
-         "unexpected types for extended operands to VMULL");
-  return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+  if (!isMLA) {
+    Op0 = SkipExtension(N0, DAG);
+    assert(Op0.getValueType().is64BitVector() &&
+           Op1.getValueType().is64BitVector() &&
+           "unexpected types for extended operands to VMULL");
+    return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+  }
+
+  // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
+  // isel lowering to take advantage of no-stall back to back vmul + vmla.
+  //   vmull q0, d4, d6
+  //   vmlal q0, d5, d6
+  // is faster than
+  //   vaddl q0, d4, d5
+  //   vmovl q1, d6
+  //   vmul  q0, q0, q1
+  SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG);
+  SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG);
+  EVT Op1VT = Op1.getValueType();
+  return DAG.getNode(N0->getOpcode(), DL, VT,
+                     DAG.getNode(NewOpc, DL, VT,
+                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
+                     DAG.getNode(NewOpc, DL, VT,
+                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
 }
 
-static SDValue 
+static SDValue
 LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
   // Convert to float
   // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
@@ -4331,7 +4621,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
   Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
   // Get reciprocal estimate.
   // float4 recip = vrecpeq_f32(yf);
-  Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+  Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
                    DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
   // Because char has a smaller range than uchar, we can actually get away
   // without any newton steps.  This requires that we use a weird bias
@@ -4349,7 +4639,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
   return X;
 }
 
-static SDValue 
+static SDValue
 LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
   SDValue N2;
   // Convert to float.
@@ -4359,13 +4649,13 @@ LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
   N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
   N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
   N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
-  
+
   // Use reciprocal estimate and one refinement step.
   // float4 recip = vrecpeq_f32(yf);
   // recip *= vrecpsq_f32(yf, recip);
-  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
                    DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
-  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
                    DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
                    N1, N2);
   N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
@@ -4395,15 +4685,15 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
   SDValue N0 = Op.getOperand(0);
   SDValue N1 = Op.getOperand(1);
   SDValue N2, N3;
-  
+
   if (VT == MVT::v8i8) {
     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
-    
+
     N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
                      DAG.getIntPtrConstant(4));
     N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
-                     DAG.getIntPtrConstant(4)); 
+                     DAG.getIntPtrConstant(4));
     N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
                      DAG.getIntPtrConstant(0));
     N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
@@ -4414,7 +4704,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
 
     N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
     N0 = LowerCONCAT_VECTORS(N0, DAG);
-    
+
     N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
     return N0;
   }
@@ -4430,32 +4720,32 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
   SDValue N0 = Op.getOperand(0);
   SDValue N1 = Op.getOperand(1);
   SDValue N2, N3;
-  
+
   if (VT == MVT::v8i8) {
     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
     N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
-    
+
     N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
                      DAG.getIntPtrConstant(4));
     N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
-                     DAG.getIntPtrConstant(4)); 
+                     DAG.getIntPtrConstant(4));
     N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
                      DAG.getIntPtrConstant(0));
     N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
                      DAG.getIntPtrConstant(0));
-    
+
     N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
     N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
-    
+
     N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
     N0 = LowerCONCAT_VECTORS(N0, DAG);
-    
-    N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, 
+
+    N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
                      DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
                      N0);
     return N0;
   }
-  
+
   // v4i16 sdiv ... Convert to float.
   // float4 yf = vcvt_f32_s32(vmovl_u16(y));
   // float4 xf = vcvt_f32_s32(vmovl_u16(x));
@@ -4468,13 +4758,13 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
   // float4 recip = vrecpeq_f32(yf);
   // recip *= vrecpsq_f32(yf, recip);
   // recip *= vrecpsq_f32(yf, recip);
-  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
                    DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
-  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
                    DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
                    N1, N2);
   N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
-  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
                    DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
                    N1, N2);
   N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
@@ -4503,7 +4793,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::GlobalAddress:
     return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
       LowerGlobalAddressELF(Op, DAG);
-  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
   case ISD::SELECT:        return LowerSELECT(Op, DAG);
   case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
   case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
@@ -4524,7 +4814,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
                                                                Subtarget);
-  case ISD::BITCAST:   return ExpandBITCAST(Op.getNode(), DAG);
+  case ISD::BITCAST:       return ExpandBITCAST(Op.getNode(), DAG);
   case ISD::SHL:
   case ISD::SRL:
   case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
@@ -4754,6 +5044,109 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   return BB;
 }
 
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
+                                          MachineBasicBlock *BB,
+                                          unsigned Size,
+                                          bool signExtend,
+                                          ARMCC::CondCodes Cond) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction *MF = BB->getParent();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  unsigned dest = MI->getOperand(0).getReg();
+  unsigned ptr = MI->getOperand(1).getReg();
+  unsigned incr = MI->getOperand(2).getReg();
+  unsigned oldval = dest;
+  DebugLoc dl = MI->getDebugLoc();
+
+  bool isThumb2 = Subtarget->isThumb2();
+  unsigned ldrOpc, strOpc, extendOpc;
+  switch (Size) {
+  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+  case 1:
+    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+    extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr;
+    break;
+  case 2:
+    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+    extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr;
+    break;
+  case 4:
+    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+    extendOpc = 0;
+    break;
+  }
+
+  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, loopMBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+  unsigned scratch2 = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loopMBB
+  BB->addSuccessor(loopMBB);
+
+  //  loopMBB:
+  //   ldrex dest, ptr
+  //   (sign extend dest, if required)
+  //   cmp dest, incr
+  //   cmov.cond scratch2, dest, incr
+  //   strex scratch, scratch2, ptr
+  //   cmp scratch, #0
+  //   bne- loopMBB
+  //   fallthrough --> exitMBB
+  BB = loopMBB;
+  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+
+  // Sign extend the value, if necessary.
+  if (signExtend && extendOpc) {
+    oldval = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+    AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest));
+  }
+
+  // Build compare and cmov instructions.
+  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+                 .addReg(oldval).addReg(incr));
+  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
+         .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
+
+  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
+                 .addReg(ptr));
+  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+                 .addReg(scratch).addImm(0));
+  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
 static
 MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
   for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
@@ -4763,6 +5156,72 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
   llvm_unreachable("Expecting a BB with two successors!");
 }
 
+// FIXME: This opcode table should obviously be expressed in the target
+// description. We probably just need a "machine opcode" value in the pseudo
+// instruction. But the ideal solution maybe to simply remove the "S" version
+// of the opcode altogether.
+struct AddSubFlagsOpcodePair {
+  unsigned PseudoOpc;
+  unsigned MachineOpc;
+};
+
+static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
+  {ARM::ADCSri, ARM::ADCri},
+  {ARM::ADCSrr, ARM::ADCrr},
+  {ARM::ADCSrs, ARM::ADCrs},
+  {ARM::SBCSri, ARM::SBCri},
+  {ARM::SBCSrr, ARM::SBCrr},
+  {ARM::SBCSrs, ARM::SBCrs},
+  {ARM::RSBSri, ARM::RSBri},
+  {ARM::RSBSrr, ARM::RSBrr},
+  {ARM::RSBSrs, ARM::RSBrs},
+  {ARM::RSCSri, ARM::RSCri},
+  {ARM::RSCSrs, ARM::RSCrs},
+  {ARM::t2ADCSri, ARM::t2ADCri},
+  {ARM::t2ADCSrr, ARM::t2ADCrr},
+  {ARM::t2ADCSrs, ARM::t2ADCrs},
+  {ARM::t2SBCSri, ARM::t2SBCri},
+  {ARM::t2SBCSrr, ARM::t2SBCrr},
+  {ARM::t2SBCSrs, ARM::t2SBCrs},
+  {ARM::t2RSBSri, ARM::t2RSBri},
+  {ARM::t2RSBSrs, ARM::t2RSBrs},
+};
+
+// Convert and Add or Subtract with Carry and Flags to a generic opcode with
+// CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>).
+//
+// FIXME: Somewhere we should assert that CPSR<def> is in the correct
+// position to be recognized by the target descrition as the 'S' bit.
+bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI,
+                                             MachineBasicBlock *BB) const {
+  unsigned OldOpc = MI->getOpcode();
+  unsigned NewOpc = 0;
+
+  // This is only called for instructions that need remapping, so iterating over
+  // the tiny opcode table is not costly.
+  static const int NPairs =
+    sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
+  for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0],
+         *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) {
+    if (OldOpc == Pair->PseudoOpc) {
+      NewOpc = Pair->MachineOpc;
+      break;
+    }
+  }
+  if (!NewOpc)
+    return false;
+
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
+  for (unsigned i = 0; i < MI->getNumOperands(); ++i)
+    MIB.addOperand(MI->getOperand(i));
+  AddDefaultPred(MIB);
+  MIB.addReg(ARM::CPSR, RegState::Define); // S bit
+  MI->eraseFromParent();
+  return true;
+}
+
 MachineBasicBlock *
 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) const {
@@ -4770,10 +5229,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   DebugLoc dl = MI->getDebugLoc();
   bool isThumb2 = Subtarget->isThumb2();
   switch (MI->getOpcode()) {
-  default:
+  default: {
+    if (RemapAddSubWithFlags(MI, BB))
+      return BB;
+
     MI->dump();
     llvm_unreachable("Unexpected instr type to insert");
-
+  }
   case ARM::ATOMIC_LOAD_ADD_I8:
      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
   case ARM::ATOMIC_LOAD_ADD_I16:
@@ -4816,6 +5278,34 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case ARM::ATOMIC_LOAD_SUB_I32:
      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
 
+  case ARM::ATOMIC_LOAD_MIN_I8:
+     return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
+  case ARM::ATOMIC_LOAD_MIN_I16:
+     return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
+  case ARM::ATOMIC_LOAD_MIN_I32:
+     return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
+
+  case ARM::ATOMIC_LOAD_MAX_I8:
+     return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
+  case ARM::ATOMIC_LOAD_MAX_I16:
+     return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
+  case ARM::ATOMIC_LOAD_MAX_I32:
+     return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
+
+  case ARM::ATOMIC_LOAD_UMIN_I8:
+     return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
+  case ARM::ATOMIC_LOAD_UMIN_I16:
+     return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
+  case ARM::ATOMIC_LOAD_UMIN_I32:
+     return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
+
+  case ARM::ATOMIC_LOAD_UMAX_I8:
+     return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
+  case ARM::ATOMIC_LOAD_UMAX_I16:
+     return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
+  case ARM::ATOMIC_LOAD_UMAX_I32:
+     return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
+
   case ARM::ATOMIC_SWAP_I8:  return EmitAtomicBinary(MI, BB, 1, 0);
   case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
   case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
@@ -5034,6 +5524,42 @@ static SDValue PerformSUBCombine(SDNode *N,
   return SDValue();
 }
 
+/// PerformVMULCombine
+/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
+/// special multiplier accumulator forwarding.
+///   vmul d3, d0, d2
+///   vmla d3, d1, d2
+/// is faster than
+///   vadd d3, d0, d1
+///   vmul d3, d3, d2
+static SDValue PerformVMULCombine(SDNode *N,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const ARMSubtarget *Subtarget) {
+  if (!Subtarget->hasVMLxForwarding())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  unsigned Opcode = N0.getOpcode();
+  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+      Opcode != ISD::FADD && Opcode != ISD::FSUB) {
+    Opcode = N0.getOpcode();
+    if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+        Opcode != ISD::FADD && Opcode != ISD::FSUB)
+      return SDValue();
+    std::swap(N0, N1);
+  }
+
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+  SDValue N00 = N0->getOperand(0);
+  SDValue N01 = N0->getOperand(1);
+  return DAG.getNode(Opcode, DL, VT,
+                     DAG.getNode(ISD::MUL, DL, VT, N00, N1),
+                     DAG.getNode(ISD::MUL, DL, VT, N01, N1));
+}
+
 static SDValue PerformMULCombine(SDNode *N,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const ARMSubtarget *Subtarget) {
@@ -5046,6 +5572,8 @@ static SDValue PerformMULCombine(SDNode *N,
     return SDValue();
 
   EVT VT = N->getValueType(0);
+  if (VT.is64BitVector() || VT.is128BitVector())
+    return PerformVMULCombine(N, DCI, Subtarget);
   if (VT != MVT::i32)
     return SDValue();
 
@@ -5088,12 +5616,16 @@ static SDValue PerformMULCombine(SDNode *N,
 
 static SDValue PerformANDCombine(SDNode *N,
                                 TargetLowering::DAGCombinerInfo &DCI) {
+
   // Attempt to use immediate-form VBIC
   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
   DebugLoc dl = N->getDebugLoc();
   EVT VT = N->getValueType(0);
   SelectionDAG &DAG = DCI.DAG;
 
+  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return SDValue();
+
   APInt SplatBits, SplatUndef;
   unsigned SplatBitSize;
   bool HasAnyUndefs;
@@ -5127,6 +5659,9 @@ static SDValue PerformORCombine(SDNode *N,
   EVT VT = N->getValueType(0);
   SelectionDAG &DAG = DCI.DAG;
 
+  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return SDValue();
+
   APInt SplatBits, SplatUndef;
   unsigned SplatBitSize;
   bool HasAnyUndefs;
@@ -5147,6 +5682,37 @@ static SDValue PerformORCombine(SDNode *N,
     }
   }
 
+  SDValue N0 = N->getOperand(0);
+  if (N0.getOpcode() != ISD::AND)
+    return SDValue();
+  SDValue N1 = N->getOperand(1);
+
+  // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
+  if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
+      DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+    APInt SplatUndef;
+    unsigned SplatBitSize;
+    bool HasAnyUndefs;
+
+    BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
+    APInt SplatBits0;
+    if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
+                                  HasAnyUndefs) && !HasAnyUndefs) {
+      BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
+      APInt SplatBits1;
+      if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
+                                    HasAnyUndefs) && !HasAnyUndefs &&
+          SplatBits0 == ~SplatBits1) {
+        // Canonicalize the vector type to make instruction selection simpler.
+        EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+        SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
+                                     N0->getOperand(1), N0->getOperand(0),
+                                     N1->getOperand(0));
+        return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+      }
+    }
+  }
+
   // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
   // reasonable.
 
@@ -5154,19 +5720,16 @@ static SDValue PerformORCombine(SDNode *N,
   if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
     return SDValue();
 
-  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
   DebugLoc DL = N->getDebugLoc();
   // 1) or (and A, mask), val => ARMbfi A, val, mask
   //      iff (val & mask) == val
   //
   // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
   //  2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
-  //          && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+  //          && mask == ~mask2
   //  2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
-  //          && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+  //          && ~mask == mask2
   //  (i.e., copy a bitfield value into another bitfield of the same width)
-  if (N0.getOpcode() != ISD::AND)
-    return SDValue();
 
   if (VT != MVT::i32)
     return SDValue();
@@ -5209,26 +5772,26 @@ static SDValue PerformORCombine(SDNode *N,
       return SDValue();
     unsigned Mask2 = N11C->getZExtValue();
 
+    // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
+    // as is to match.
     if (ARM::isBitFieldInvertedMask(Mask) &&
-        ARM::isBitFieldInvertedMask(~Mask2) &&
-        (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) {
+        (Mask == ~Mask2)) {
       // The pack halfword instruction works better for masks that fit it,
       // so use that when it's available.
       if (Subtarget->hasT2ExtractPack() &&
           (Mask == 0xffff || Mask == 0xffff0000))
         return SDValue();
       // 2a
-      unsigned lsb = CountTrailingZeros_32(Mask2);
+      unsigned amt = CountTrailingZeros_32(Mask2);
       Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
-                        DAG.getConstant(lsb, MVT::i32));
+                        DAG.getConstant(amt, MVT::i32));
       Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
                         DAG.getConstant(Mask, MVT::i32));
       // Do not add new nodes to DAG combiner worklist.
       DCI.CombineTo(N, Res, false);
       return SDValue();
     } else if (ARM::isBitFieldInvertedMask(~Mask) &&
-               ARM::isBitFieldInvertedMask(Mask2) &&
-               (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
+               (~Mask == Mask2)) {
       // The pack halfword instruction works better for masks that fit it,
       // so use that when it's available.
       if (Subtarget->hasT2ExtractPack() &&
@@ -5239,7 +5802,7 @@ static SDValue PerformORCombine(SDNode *N,
       Res = DAG.getNode(ISD::SRL, DL, VT, N00,
                         DAG.getConstant(lsb, MVT::i32));
       Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
-                                DAG.getConstant(Mask2, MVT::i32));
+                        DAG.getConstant(Mask2, MVT::i32));
       // Do not add new nodes to DAG combiner worklist.
       DCI.CombineTo(N, Res, false);
       return SDValue();
@@ -5294,6 +5857,37 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
   SDValue InDouble = N->getOperand(0);
   if (InDouble.getOpcode() == ARMISD::VMOVDRR)
     return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
+
+  // vmovrrd(load f64) -> (load i32), (load i32)
+  SDNode *InNode = InDouble.getNode();
+  if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
+      InNode->getValueType(0) == MVT::f64 &&
+      InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
+      !cast<LoadSDNode>(InNode)->isVolatile()) {
+    // TODO: Should this be done for non-FrameIndex operands?
+    LoadSDNode *LD = cast<LoadSDNode>(InNode);
+
+    SelectionDAG &DAG = DCI.DAG;
+    DebugLoc DL = LD->getDebugLoc();
+    SDValue BasePtr = LD->getBasePtr();
+    SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
+                                 LD->getPointerInfo(), LD->isVolatile(),
+                                 LD->isNonTemporal(), LD->getAlignment());
+
+    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+                                    DAG.getConstant(4, MVT::i32));
+    SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
+                                 LD->getPointerInfo(), LD->isVolatile(),
+                                 LD->isNonTemporal(),
+                                 std::min(4U, LD->getAlignment() / 2));
+
+    DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
+    SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
+    DCI.RemoveFromWorklist(LD);
+    DAG.DeleteNode(LD);
+    return Result;
+  }
+
   return SDValue();
 }
 
@@ -5323,8 +5917,28 @@ static SDValue PerformSTORECombine(SDNode *N,
   // Otherwise, the i64 value will be legalized to a pair of i32 values.
   StoreSDNode *St = cast<StoreSDNode>(N);
   SDValue StVal = St->getValue();
-  if (!ISD::isNormalStore(St) || St->isVolatile() ||
-      StVal.getValueType() != MVT::i64 ||
+  if (!ISD::isNormalStore(St) || St->isVolatile())
+    return SDValue();
+
+  if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
+      StVal.getNode()->hasOneUse() && !St->isVolatile()) {
+    SelectionDAG  &DAG = DCI.DAG;
+    DebugLoc DL = St->getDebugLoc();
+    SDValue BasePtr = St->getBasePtr();
+    SDValue NewST1 = DAG.getStore(St->getChain(), DL,
+                                  StVal.getNode()->getOperand(0), BasePtr,
+                                  St->getPointerInfo(), St->isVolatile(),
+                                  St->isNonTemporal(), St->getAlignment());
+
+    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+                                    DAG.getConstant(4, MVT::i32));
+    return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1),
+                        OffsetPtr, St->getPointerInfo(), St->isVolatile(),
+                        St->isNonTemporal(),
+                        std::min(4U, St->getAlignment() / 2));
+  }
+
+  if (StVal.getValueType() != MVT::i64 ||
       StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
     return SDValue();
 
@@ -5553,7 +6167,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
     EVT VecTy;
     if (isLoad)
       VecTy = N->getValueType(0);
-    else 
+    else
       VecTy = N->getOperand(AddrOpIdx+1).getValueType();
     unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
     if (isLaneOp)
@@ -5603,7 +6217,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
     DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
 
     break;
-  } 
+  }
   return SDValue();
 }
 
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index dc400c4..a2e6260 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -57,7 +57,6 @@ namespace llvm {
       CMPFPw0,      // ARM VFP compare against zero instruction, sets FPSCR.
       FMSTAT,       // ARM fmstat instruction.
       CMOV,         // ARM conditional move instructions.
-      CNEG,         // ARM conditional negate instructions.
 
       BCC_i64,
 
@@ -89,7 +88,7 @@ namespace llvm {
       MEMBARRIER_MCR, // Memory barrier (MCR)
 
       PRELOAD,      // Preload
-      
+
       VCEQ,         // Vector compare equal.
       VCEQZ,        // Vector compare equal to zero.
       VCGE,         // Vector compare greater than or equal.
@@ -154,6 +153,8 @@ namespace llvm {
       VZIP,         // zip (interleave)
       VUZP,         // unzip (deinterleave)
       VTRN,         // transpose
+      VTBL1,        // 1-register shuffle with mask
+      VTBL2,        // 2-register shuffle with mask
 
       // Vector multiply long:
       VMULLs,       // ...signed
@@ -172,12 +173,15 @@ namespace llvm {
 
       // Bit-field insert
       BFI,
-      
+
       // Vector OR with immediate
       VORRIMM,
       // Vector AND with NOT of immediate
       VBICIMM,
 
+      // Vector bitwise select
+      VBSL,
+
       // Vector load N-element structure to all lanes:
       VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
       VLD3DUP,
@@ -330,9 +334,6 @@ namespace llvm {
 
     Sched::Preference getSchedulingPreference(SDNode *N) const;
 
-    unsigned getRegPressureLimit(const TargetRegisterClass *RC,
-                                 MachineFunction &MF) const;
-
     bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
@@ -407,7 +408,7 @@ namespace llvm {
     SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 
+    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
                               const ARMSubtarget *ST) const;
 
     SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
@@ -425,6 +426,13 @@ namespace llvm {
                            DebugLoc dl, SelectionDAG &DAG,
                            SmallVectorImpl<SDValue> &InVals) const;
 
+    void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+                              DebugLoc dl, SDValue &Chain, unsigned ArgOffset)
+      const;
+
+    void computeRegArea(CCState &CCInfo, MachineFunction &MF,
+                        unsigned &VARegSize, unsigned &VARegSaveSize) const;
+
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg,
@@ -435,6 +443,9 @@ namespace llvm {
                 DebugLoc dl, SelectionDAG &DAG,
                 SmallVectorImpl<SDValue> &InVals) const;
 
+    /// HandleByVal - Target-specific cleanup for ByVal support.
+    virtual void HandleByVal(CCState *, unsigned &) const;
+
     /// IsEligibleForTailCallOptimization - Check whether the call is eligible
     /// for tail call optimization. Targets which want to do tail call
     /// optimization should implement this function.
@@ -456,10 +467,13 @@ namespace llvm {
 
     virtual bool isUsedByReturnOnly(SDNode *N) const;
 
+    virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
     SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                       SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
     SDValue getVFPCmp(SDValue LHS, SDValue RHS,
                       SelectionDAG &DAG, DebugLoc dl) const;
+    SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
 
     SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
 
@@ -470,16 +484,22 @@ namespace llvm {
                                         MachineBasicBlock *BB,
                                         unsigned Size,
                                         unsigned BinOpcode) const;
+    MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
+                                               MachineBasicBlock *BB,
+                                               unsigned Size,
+                                               bool signExtend,
+                                               ARMCC::CondCodes Cond) const;
 
+    bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
   };
-  
+
   enum NEONModImmType {
     VMOVModImm,
     VMVNModImm,
     OtherModImm
   };
-  
-  
+
+
   namespace ARM {
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
   }
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 359ac45..f5fb98e 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -206,19 +206,30 @@ def setend_op : Operand<i32> {
   let PrintMethod = "printSetendOperand";
 }
 
-def cps_opt : Operand<i32> {
-  let PrintMethod = "printCPSOptionOperand";
-}
-
 def msr_mask : Operand<i32> {
   let PrintMethod = "printMSRMaskOperand";
   let ParserMatchClass = MSRMaskOperand;
 }
 
-// A8.6.117, A8.6.118.  Different instructions are generated for #0 and #-0.
-// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
-def neg_zero : Operand<i32> {
-  let PrintMethod = "printNegZeroOperand";
+// Shift Right Immediate - A shift right immediate is encoded differently from
+// other shift immediates. The imm6 field is encoded like so:
+//
+//    Offset    Encoding
+//     8        imm6<5:3> = '001', 8 - <imm> is encoded in imm6<2:0>
+//     16       imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
+//     32       imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
+//     64       64 - <imm> is encoded in imm6<5:0>
+def shr_imm8  : Operand<i32> {
+  let EncoderMethod = "getShiftRight8Imm";
+}
+def shr_imm16 : Operand<i32> {
+  let EncoderMethod = "getShiftRight16Imm";
+}
+def shr_imm32 : Operand<i32> {
+  let EncoderMethod = "getShiftRight32Imm";
+}
+def shr_imm64 : Operand<i32> {
+  let EncoderMethod = "getShiftRight64Imm";
 }
 
 //===----------------------------------------------------------------------===//
@@ -279,6 +290,7 @@ class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
   let OutOperandList = oops;
   let InOperandList = iops;
   let Pattern = pattern;
+  let isCodeGenOnly = 1;
 }
 
 // PseudoInst that's ARM-mode only.
@@ -422,11 +434,11 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
       opc, asm, "", pattern> {
   bits<4> Rd;
   bits<4> Rt;
-  bits<4> Rn;
+  bits<4> addr;
   let Inst{27-23} = 0b00011;
   let Inst{22-21} = opcod;
   let Inst{20}    = 0;
-  let Inst{19-16} = Rn;
+  let Inst{19-16} = addr;
   let Inst{15-12} = Rd;
   let Inst{11-4}  = 0b11111001;
   let Inst{3-0}   = Rt;
@@ -513,6 +525,24 @@ class AI2stridx<bit isByte, bit isPre, dag oops, dag iops,
   let Inst{19-16} = Rn;
   let Inst{11-0} = offset{11-0};
 }
+// FIXME: Merge with the above class when addrmode2 gets used for STR, STRB
+// but for now use this class for STRT and STRBT.
+class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops,
+                IndexMode im, Format f, InstrItinClass itin, string opc,
+                string asm, string cstr, list<dag> pattern>
+  : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+               pattern> {
+  // AM2 store w/ two operands: (GPR, am2offset)
+  // {17-14}  Rn
+  // {13}     1 == Rm, 0 == imm12
+  // {12}     isAdd
+  // {11-0}   imm12/Rm
+  bits<18> addr;
+  let Inst{25} = addr{13};
+  let Inst{23} = addr{12};
+  let Inst{19-16} = addr{17-14};
+  let Inst{11-0} = addr{11-0};
+}
 
 // addrmode3 instructions
 class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
@@ -547,6 +577,34 @@ class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
   let Inst{15-12} = Rt;           // Rt
   let Inst{7-4}   = op;
 }
+
+// FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB
+// but for now use this class for LDRSBT, LDRHT, LDSHT.
+class AI3ldstidxT<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
+                  IndexMode im, Format f, InstrItinClass itin, string opc,
+                  string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, im, f, itin,
+      opc, asm, cstr, pattern> {
+  // {13}     1 == imm8, 0 == Rm
+  // {12-9}   Rn
+  // {8}      isAdd
+  // {7-4}    imm7_4/zero
+  // {3-0}    imm3_0/Rm
+  bits<14> addr;
+  bits<4> Rt;
+  let Inst{27-25} = 0b000;
+  let Inst{24}    = isPre;        // P bit
+  let Inst{23}    = addr{8};      // U bit
+  let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
+  let Inst{20}    = op20;         // L bit
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{11-8}  = addr{7-4};    // imm7_4/zero
+  let Inst{7-4}   = op;
+  let Inst{3-0}   = addr{3-0};    // imm3_0/Rm
+  let AsmMatchConverter = "CvtLdWriteBackRegAddrMode3";
+}
+
 class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops,
                 IndexMode im, Format f, InstrItinClass itin, string opc,
                 string asm, string cstr, list<dag> pattern>
@@ -619,12 +677,25 @@ class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
   : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
       opc, asm, cstr,pattern> {
+  // {13}     1 == imm8, 0 == Rm
+  // {12-9}   Rn
+  // {8}      isAdd
+  // {7-4}    imm7_4/zero
+  // {3-0}    imm3_0/Rm
+  bits<14> addr;
+  bits<4> Rt;
+  let Inst{3-0}   = addr{3-0};    // imm3_0/Rm
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
   let Inst{6}     = 0; // S bit
   let Inst{7}     = 1;
+  let Inst{11-8}  = addr{7-4};    // imm7_4/zero
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{19-16} = addr{12-9};   // Rn
   let Inst{20}    = 0; // L bit
   let Inst{21}    = 0; // W bit
+  let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
+  let Inst{23}    = addr{8};      // U bit
   let Inst{24}    = 0; // P bit
   let Inst{27-25} = 0b000;
 }
@@ -1670,7 +1741,8 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
 }
 
 // NEON 3 vector register format.
-class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+
+class N3VCommon<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
           dag oops, dag iops, Format f, InstrItinClass itin,
           string opc, string dt, string asm, string cstr, list<dag> pattern>
   : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
@@ -1680,6 +1752,13 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
   let Inst{11-8}  = op11_8;
   let Inst{6}     = op6;
   let Inst{4}     = op4;
+}
+
+class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+          dag oops, dag iops, Format f, InstrItinClass itin,
+          string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+              oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
 
   // Instruction operands.
   bits<5> Vd;
@@ -1694,6 +1773,47 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
   let Inst{5}     = Vm{4};
 }
 
+class N3VLane32<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+          dag oops, dag iops, Format f, InstrItinClass itin,
+          string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+              oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vn;
+  bits<5> Vm;
+  bit lane;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{19-16} = Vn{3-0};
+  let Inst{7}     = Vn{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = lane;
+}
+
+class N3VLane16<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+          dag oops, dag iops, Format f, InstrItinClass itin,
+          string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+              oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vn;
+  bits<5> Vm;
+  bits<2> lane;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{19-16} = Vn{3-0};
+  let Inst{7}     = Vn{4};
+  let Inst{2-0}   = Vm{2-0};
+  let Inst{5}     = lane{1};
+  let Inst{3}     = lane{0};
+}
+
 // Same as N3V except it doesn't have a data type suffix.
 class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
            bit op4,
@@ -1730,6 +1850,8 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
   let Inst{11-8}  = opcod2;
   let Inst{6-5}   = opcod3;
   let Inst{4}     = 1;
+  // A8.6.303, A8.6.328, A8.6.329
+  let Inst{3-0}   = 0b0000;
 
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 6e3fe2e..209c1a3 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -58,7 +58,7 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
                                                  SDTCisInt<2>]>;
 def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
 
-def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 0, []>;
 
 def SDT_ARMMEMBARRIER     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
@@ -93,8 +93,6 @@ def ARMretflag       : SDNode<"ARMISD::RET_FLAG", SDTNone,
 
 def ARMcmov          : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
                               [SDNPInGlue]>;
-def ARMcneg          : SDNode<"ARMISD::CNEG", SDT_ARMCMov,
-                              [SDNPInGlue]>;
 
 def ARMbrcond        : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
                               [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
@@ -205,13 +203,13 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{
 }]>;
 
 /// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
-def imm1_15 : PatLeaf<(i32 imm), [{
-  return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16;
+def imm1_15 : ImmLeaf<i32, [{
+  return (int32_t)Imm >= 1 && (int32_t)Imm < 16;
 }]>;
 
 /// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
-def imm16_31 : PatLeaf<(i32 imm), [{
-  return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32;
+def imm16_31 : ImmLeaf<i32, [{
+  return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
 }]>;
 
 def so_imm_neg :
@@ -241,8 +239,8 @@ def lo16AllZero : PatLeaf<(i32 imm), [{
 
 /// imm0_65535 predicate - True if the 32-bit immediate is in the range
 /// [0.65535].
-def imm0_65535 : PatLeaf<(i32 imm), [{
-  return (uint32_t)N->getZExtValue() < 65536;
+def imm0_65535 : ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 65536;
 }]>;
 
 class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
@@ -377,17 +375,23 @@ def neon_vcvt_imm32 : Operand<i32> {
 }
 
 // rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
-def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{
-    int32_t v = (int32_t)N->getZExtValue();
+def rot_imm : Operand<i32>, ImmLeaf<i32, [{
+    int32_t v = (int32_t)Imm;
     return v == 8 || v == 16 || v == 24; }]> {
   let EncoderMethod = "getRotImmOpValue";
 }
 
+def ShifterAsmOperand : AsmOperandClass {
+  let Name = "Shifter";
+  let SuperClasses = [];
+}
+
 // shift_imm: An integer that encodes a shift amount and the type of shift
 // (currently either asr or lsl) using the same encoding used for the
 // immediates in so_reg operands.
 def shift_imm : Operand<i32> {
   let PrintMethod = "printShiftImmOperand";
+  let ParserMatchClass = ShifterAsmOperand;
 }
 
 // shifter_operand operands: so_reg and so_imm.
@@ -396,19 +400,21 @@ def so_reg : Operand<i32>,    // reg reg imm
                             [shl,srl,sra,rotr]> {
   let EncoderMethod = "getSORegOpValue";
   let PrintMethod = "printSORegOperand";
-  let MIOperandInfo = (ops GPR, GPR, i32imm);
+  let MIOperandInfo = (ops GPR, GPR, shift_imm);
 }
 def shift_so_reg : Operand<i32>,    // reg reg imm
                    ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
                                   [shl,srl,sra,rotr]> {
   let EncoderMethod = "getSORegOpValue";
   let PrintMethod = "printSORegOperand";
-  let MIOperandInfo = (ops GPR, GPR, i32imm);
+  let MIOperandInfo = (ops GPR, GPR, shift_imm);
 }
 
 // so_imm - Match a 32-bit shifter_operand immediate operand, which is an
 // 8-bit immediate rotated by an arbitrary number of bits.
-def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> {
+def so_imm : Operand<i32>, ImmLeaf<i32, [{
+    return ARM_AM::getSOImmVal(Imm) != -1;
+  }]> {
   let EncoderMethod = "getSOImmOpValue";
   let PrintMethod = "printSOImmOperand";
 }
@@ -429,13 +435,13 @@ def arm_i32imm : PatLeaf<(imm), [{
 }]>;
 
 /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
-def imm0_31 : Operand<i32>, PatLeaf<(imm), [{
-  return (int32_t)N->getZExtValue() < 32;
+def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 32;
 }]>;
 
 /// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'.
-def imm0_31_m1 : Operand<i32>, PatLeaf<(imm), [{
-  return (int32_t)N->getZExtValue() < 32;
+def imm0_31_m1 : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 32;
 }]> {
   let EncoderMethod = "getImmMinusOneOpValue";
 }
@@ -458,19 +464,30 @@ def bf_inv_mask_imm : Operand<i32>,
 }
 
 /// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p
-def lsb_pos_imm : Operand<i32>, PatLeaf<(imm), [{
-  return isInt<5>(N->getSExtValue());
+def lsb_pos_imm : Operand<i32>, ImmLeaf<i32, [{
+  return isInt<5>(Imm);
 }]>;
 
 /// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p
-def width_imm : Operand<i32>, PatLeaf<(imm), [{
-  return N->getSExtValue() > 0 &&  N->getSExtValue() <= 32;
+def width_imm : Operand<i32>, ImmLeaf<i32, [{
+  return Imm > 0 &&  Imm <= 32;
 }] > {
   let EncoderMethod = "getMsbOpValue";
 }
 
 // Define ARM specific addressing modes.
 
+def MemMode2AsmOperand : AsmOperandClass {
+  let Name = "MemMode2";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseMemMode2Operand";
+}
+
+def MemMode3AsmOperand : AsmOperandClass {
+  let Name = "MemMode3";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseMemMode3Operand";
+}
 
 // addrmode_imm12 := reg +/- imm12
 //
@@ -501,6 +518,7 @@ def addrmode2 : Operand<i32>,
                 ComplexPattern<i32, 3, "SelectAddrMode2", []> {
   let EncoderMethod = "getAddrMode2OpValue";
   let PrintMethod = "printAddrMode2Operand";
+  let ParserMatchClass = MemMode2AsmOperand;
   let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
 }
 
@@ -519,6 +537,7 @@ def addrmode3 : Operand<i32>,
                 ComplexPattern<i32, 3, "SelectAddrMode3", []> {
   let EncoderMethod = "getAddrMode3OpValue";
   let PrintMethod = "printAddrMode3Operand";
+  let ParserMatchClass = MemMode3AsmOperand;
   let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
 }
 
@@ -586,6 +605,21 @@ def addrmodepc : Operand<i32>,
   let MIOperandInfo = (ops GPR, i32imm);
 }
 
+def MemMode7AsmOperand : AsmOperandClass {
+  let Name = "MemMode7";
+  let SuperClasses = [];
+}
+
+// addrmode7 := reg
+// Used by load/store exclusive instructions. Useful to enable right assembly
+// parsing and printing. Not used for any codegen matching.
+//
+def addrmode7 : Operand<i32> {
+  let PrintMethod = "printAddrMode7Operand";
+  let MIOperandInfo = (ops GPR);
+  let ParserMatchClass = MemMode7AsmOperand;
+}
+
 def nohash_imm : Operand<i32> {
   let PrintMethod = "printNoHashImmediate";
 }
@@ -902,52 +936,23 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
     let Inst{19-16} = Rn;
   }
 }
+}
+
 // Carry setting variants
-let isCodeGenOnly = 1, Defs = [CPSR] in {
-multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
-                             bit Commutable = 0> {
-  def Sri : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
-                DPFrm, IIC_iALUi, !strconcat(opc, "\t$Rd, $Rn, $imm"),
-               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
-               Requires<[IsARM]> {
-    bits<4> Rd;
-    bits<4> Rn;
-    bits<12> imm;
-    let Inst{15-12} = Rd;
-    let Inst{19-16} = Rn;
-    let Inst{11-0} = imm;
-    let Inst{20} = 1;
-    let Inst{25} = 1;
-  }
-  def Srr : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
-                DPFrm, IIC_iALUr, !strconcat(opc, "\t$Rd, $Rn, $Rm"),
-               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
-               Requires<[IsARM]> {
-    bits<4> Rd;
-    bits<4> Rn;
-    bits<4> Rm;
-    let Inst{11-4} = 0b00000000;
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1 in {
+multiclass AI1_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
+  def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+               Size4Bytes, IIC_iALUi,
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>;
+  def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+               Size4Bytes, IIC_iALUr,
+               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
     let isCommutable = Commutable;
-    let Inst{3-0} = Rm;
-    let Inst{15-12} = Rd;
-    let Inst{19-16} = Rn;
-    let Inst{20} = 1;
-    let Inst{25} = 0;
-  }
-  def Srs : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
-                DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$Rd, $Rn, $shift"),
-               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
-               Requires<[IsARM]> {
-    bits<4> Rd;
-    bits<4> Rn;
-    bits<12> shift;
-    let Inst{11-0} = shift;
-    let Inst{15-12} = Rd;
-    let Inst{19-16} = Rn;
-    let Inst{20} = 1;
-    let Inst{25} = 0;
   }
-}
+  def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+               Size4Bytes, IIC_iALUsr,
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>;
 }
 }
 
@@ -972,6 +977,7 @@ multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii,
                  [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> {
     bits<4>  Rt;
     bits<17> shift;
+    let shift{4}    = 0;            // Inst{4} = 0
     let Inst{23}    = shift{12};    // U (add = ('U' == 1))
     let Inst{19-16} = shift{16-13}; // Rn
     let Inst{15-12} = Rt;
@@ -1001,6 +1007,7 @@ multiclass AI_str1<bit isByte, string opc, InstrItinClass iii,
                  [(opnode GPR:$Rt, ldst_so_reg:$shift)]> {
     bits<4> Rt;
     bits<17> shift;
+    let shift{4}    = 0;            // Inst{4} = 0
     let Inst{23}    = shift{12};    // U (add = ('U' == 1))
     let Inst{19-16} = shift{16-13}; // Rn
     let Inst{15-12} = Rt;
@@ -1249,7 +1256,7 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in
 // The 'adr' mnemonic encodes differently if the label is before or after
 // the instruction. The {24-21} opcode bits are set by the fixup, as we don't
 // know until then which form of the instruction will be used.
-def ADR : AI1<0, (outs GPR:$Rd), (ins adrlabel:$label),
+def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
                  MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> {
   bits<4> Rd;
   bits<12> label;
@@ -1311,6 +1318,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
 // before calls from potentially appearing dead.
 let isCall = 1,
   // On non-Darwin platforms R9 is callee-saved.
+  // FIXME:  Do we really need a non-predicated version? If so, it should
+  // at least be a pseudo instruction expanding to the predicated version
+  // at MC lowering time.
   Defs = [R0,  R1,  R2,  R3,  R12, LR,
           D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
           D16, D17, D18, D19, D20, D21, D22, D23,
@@ -1340,7 +1350,16 @@ let isCall = 1,
             Requires<[IsARM, HasV5T, IsNotDarwin]> {
     bits<4> func;
     let Inst{31-4} = 0b1110000100101111111111110011;
-    let Inst{3-0}   = func;
+    let Inst{3-0}  = func;
+  }
+
+  def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+                    IIC_Br, "blx", "\t$func",
+                    [(ARMcall_pred GPR:$func)]>,
+                 Requires<[IsARM, HasV5T, IsNotDarwin]> {
+    bits<4> func;
+    let Inst{27-4} = 0b000100101111111111110011;
+    let Inst{3-0}  = func;
   }
 
   // ARMv4T
@@ -1364,30 +1383,25 @@ let isCall = 1,
           D16, D17, D18, D19, D20, D21, D22, D23,
           D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
   Uses = [R7, SP] in {
-  def BLr9  : ABXI<0b1011, (outs), (ins bltarget:$func, variable_ops),
-                IIC_Br, "bl\t$func",
-                [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]> {
-    let Inst{31-28} = 0b1110;
-    bits<24> func;
-    let Inst{23-0} = func;
-  }
+  def BLr9  : ARMPseudoInst<(outs), (ins bltarget:$func, variable_ops),
+                Size4Bytes, IIC_Br,
+                [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]>;
 
-  def BLr9_pred : ABI<0b1011, (outs), (ins bltarget:$func, variable_ops),
-                   IIC_Br, "bl", "\t$func",
+  def BLr9_pred : ARMPseudoInst<(outs),
+                   (ins bltarget:$func, pred:$p, variable_ops),
+                   Size4Bytes, IIC_Br,
                    [(ARMcall_pred tglobaladdr:$func)]>,
-                  Requires<[IsARM, IsDarwin]> {
-    bits<24> func;
-    let Inst{23-0} = func;
-  }
+                  Requires<[IsARM, IsDarwin]>;
 
   // ARMv5T and above
-  def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
-                IIC_Br, "blx\t$func",
-                [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> {
-    bits<4> func;
-    let Inst{31-4} = 0b1110000100101111111111110011;
-    let Inst{3-0}   = func;
-  }
+  def BLXr9 : ARMPseudoInst<(outs), (ins GPR:$func, variable_ops),
+                Size4Bytes, IIC_Br,
+                [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]>;
+
+  def BLXr9_pred: ARMPseudoInst<(outs), (ins GPR:$func, pred:$p,  variable_ops),
+                      Size4Bytes, IIC_Br,
+                      [(ARMcall_pred GPR:$func)]>,
+                   Requires<[IsARM, HasV5T, IsDarwin]>;
 
   // ARMv4T
   // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
@@ -1403,11 +1417,7 @@ let isCall = 1,
 
 // Tail calls.
 
-// FIXME: These should probably be xformed into the non-TC versions of the
-// instructions as part of MC lowering.
-// FIXME: These seem to be used for both Thumb and ARM instruction selection.
-// Thumb should have its own version since the instruction is actually
-// different, even though the mnemonic is the same.
+// FIXME: The Thumb versions of these should live in ARMInstrThumb.td
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
   // Darwin versions.
   let Defs = [R0, R1, R2, R3, R9, R12,
@@ -1421,21 +1431,21 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
     def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
                        IIC_Br, []>, Requires<[IsDarwin]>;
 
-    def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
-                   IIC_Br, "b\t$dst  @ TAILCALL",
+    def TAILJMPd : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
+                   Size4Bytes, IIC_Br,
                    []>, Requires<[IsARM, IsDarwin]>;
 
-    def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
-                   IIC_Br, "b.w\t$dst  @ TAILCALL",
+    def tTAILJMPd: tPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
+                   Size4Bytes, IIC_Br,
                    []>, Requires<[IsThumb, IsDarwin]>;
 
-    def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops),
-                     BrMiscFrm, IIC_Br, "bx\t$dst  @ TAILCALL",
-                   []>, Requires<[IsDarwin]> {
-      bits<4> dst;
-      let Inst{31-4} = 0b1110000100101111111111110001;
-      let Inst{3-0}  = dst;
-    }
+    def TAILJMPr : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+                     Size4Bytes, IIC_Br,
+                   []>, Requires<[IsARM, IsDarwin]>;
+
+    def tTAILJMPr : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+                     Size4Bytes, IIC_Br,
+                   []>, Requires<[IsThumb, IsDarwin]>;
   }
 
   // Non-Darwin versions (the difference is R9).
@@ -1450,34 +1460,31 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
     def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
                        IIC_Br, []>, Requires<[IsNotDarwin]>;
 
-    def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
-                   IIC_Br, "b\t$dst  @ TAILCALL",
+    def TAILJMPdND : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
+                   Size4Bytes, IIC_Br,
                    []>, Requires<[IsARM, IsNotDarwin]>;
 
-    def TAILJMPdNDt : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
-                   IIC_Br, "b.w\t$dst  @ TAILCALL",
+    def tTAILJMPdND : tPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
+                   Size4Bytes, IIC_Br,
                    []>, Requires<[IsThumb, IsNotDarwin]>;
 
-    def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops),
-                     BrMiscFrm, IIC_Br, "bx\t$dst  @ TAILCALL",
-                   []>, Requires<[IsNotDarwin]> {
-      bits<4> dst;
-      let Inst{31-4} = 0b1110000100101111111111110001;
-      let Inst{3-0}  = dst;
-    }
+    def TAILJMPrND : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+                     Size4Bytes, IIC_Br,
+                   []>, Requires<[IsARM, IsNotDarwin]>;
+    def tTAILJMPrND : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+                     Size4Bytes, IIC_Br,
+                   []>, Requires<[IsThumb, IsNotDarwin]>;
   }
 }
 
 let isBranch = 1, isTerminator = 1 in {
-  // B is "predicable" since it can be xformed into a Bcc.
+  // B is "predicable" since it's just a Bcc with an 'always' condition.
   let isBarrier = 1 in {
     let isPredicable = 1 in
-    def B : ABXI<0b1010, (outs), (ins brtarget:$target), IIC_Br,
-                "b\t$target", [(br bb:$target)]> {
-      bits<24> target;
-      let Inst{31-28} = 0b1110;
-      let Inst{23-0} = target;
-    }
+    // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly
+    // should be sufficient.
+    def B : ARMPseudoInst<(outs), (ins brtarget:$target), Size4Bytes, IIC_Br,
+                [(br bb:$target)]>;
 
     let isNotDuplicable = 1, isIndirectBranch = 1 in {
     def BR_JTr : ARMPseudoInst<(outs),
@@ -1509,6 +1516,16 @@ let isBranch = 1, isTerminator = 1 in {
   }
 }
 
+// BLX (immediate) -- for disassembly only
+def BLXi : AXI<(outs), (ins br_target:$target), BrMiscFrm, NoItinerary,
+               "blx\t$target", [/* pattern left blank */]>,
+           Requires<[IsARM, HasV5T]> {
+  let Inst{31-25} = 0b1111101;
+  bits<25> target;
+  let Inst{23-0} = target{24-1};
+  let Inst{24} = target{0};
+}
+
 // Branch and Exchange Jazelle -- for disassembly only
 def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
               [/* For disassembly only; pattern left blank */]> {
@@ -1533,6 +1550,7 @@ def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
   let Inst{23-0} = svc;
 }
 }
+def : MnemonicAlias<"swi", "svc">;
 
 // Store Return State is a system instruction -- for disassembly only
 let isCodeGenOnly = 1 in {  // FIXME: This should not use submode!
@@ -1541,6 +1559,8 @@ def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
                 [/* For disassembly only; pattern left blank */]> {
   let Inst{31-28} = 0b1111;
   let Inst{22-20} = 0b110; // W = 1
+  let Inst{19-8} = 0xd05;
+  let Inst{7-5} = 0b000;
 }
 
 def SRS  : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
@@ -1548,6 +1568,8 @@ def SRS  : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
                 [/* For disassembly only; pattern left blank */]> {
   let Inst{31-28} = 0b1111;
   let Inst{22-20} = 0b100; // W = 0
+  let Inst{19-8} = 0xd05;
+  let Inst{7-5} = 0b000;
 }
 
 // Return From Exception is a system instruction -- for disassembly only
@@ -1556,6 +1578,7 @@ def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
                 [/* For disassembly only; pattern left blank */]> {
   let Inst{31-28} = 0b1111;
   let Inst{22-20} = 0b011; // W = 1
+  let Inst{15-0} = 0x0a00;
 }
 
 def RFE  : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
@@ -1563,6 +1586,7 @@ def RFE  : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
                 [/* For disassembly only; pattern left blank */]> {
   let Inst{31-28} = 0b1111;
   let Inst{22-20} = 0b001; // W = 0
+  let Inst{15-0} = 0x0a00;
 }
 } // isCodeGenOnly = 1
 
@@ -1610,15 +1634,11 @@ def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
                    IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr",
                    [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>;
 
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
-    isCodeGenOnly = 1 in { // $dst2 doesn't exist in asmstring?
-// FIXME: $dst2 isn't in the asm string as it's implied by $Rd (dst2 = Rd+1)
-//        how to represent that such that tblgen is happy and we don't
-//        mark this codegen only?
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 // Load doubleword
 def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
                  (ins addrmode3:$addr), LdMiscFrm,
-                 IIC_iLoad_d_r, "ldrd", "\t$Rd, $addr",
+                 IIC_iLoad_d_r, "ldrd", "\t$Rd, $dst2, $addr",
                  []>, Requires<[IsARM, HasV5TE]>;
 }
 
@@ -1636,6 +1656,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
     let Inst{23} = addr{12};
     let Inst{19-16} = addr{17-14};
     let Inst{11-0} = addr{11-0};
+    let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
   }
   def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
                       (ins GPR:$Rn, am2offset:$offset),
@@ -1688,40 +1709,80 @@ let mayLoad = 1, neverHasSideEffects = 1 in {
 defm LDRH  : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>;
 defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>;
 defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>;
-let hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
-defm LDRD :  AI3_ldridx<0b1101, 0, "ldrd", IIC_iLoad_d_ru>;
+let hasExtraDefRegAllocReq = 1 in {
+def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+                          (ins addrmode3:$addr), IndexModePre,
+                          LdMiscFrm, IIC_iLoad_d_ru,
+                          "ldrd", "\t$Rt, $Rt2, $addr!",
+                          "$addr.base = $Rn_wb", []> {
+  bits<14> addr;
+  let Inst{23}    = addr{8};      // U bit
+  let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{11-8}  = addr{7-4};    // imm7_4/zero
+  let Inst{3-0}   = addr{3-0};    // imm3_0/Rm
+}
+def LDRD_POST: AI3ldstidx<0b1101, 0, 1, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+                          (ins GPR:$Rn, am3offset:$offset), IndexModePost,
+                          LdMiscFrm, IIC_iLoad_d_ru,
+                          "ldrd", "\t$Rt, $Rt2, [$Rn], $offset",
+                          "$Rn = $Rn_wb", []> {
+  bits<10> offset;
+  bits<4> Rn;
+  let Inst{23}    = offset{8};      // U bit
+  let Inst{22}    = offset{9};      // 1 == imm8, 0 == Rm
+  let Inst{19-16} = Rn;
+  let Inst{11-8}  = offset{7-4};    // imm7_4/zero
+  let Inst{3-0}   = offset{3-0};    // imm3_0/Rm
+}
+} // hasExtraDefRegAllocReq = 1
 } // mayLoad = 1, neverHasSideEffects = 1
 
 // LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
 let mayLoad = 1, neverHasSideEffects = 1 in {
-def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$dst, GPR:$base_wb),
-                   (ins GPR:$base, am2offset:$offset), IndexModeNone,
-                   LdFrm, IIC_iLoad_ru,
-                   "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$base_wb),
+                   (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_ru,
+                   "ldrt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+  // {17-14}  Rn
+  // {13}     1 == Rm, 0 == imm12
+  // {12}     isAdd
+  // {11-0}   imm12/Rm
+  bits<18> addr;
+  let Inst{25} = addr{13};
+  let Inst{23} = addr{12};
   let Inst{21} = 1; // overwrite
-}
-def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
-                  (ins GPR:$base, am2offset:$offset), IndexModeNone,
-                  LdFrm, IIC_iLoad_bh_ru,
-                  "ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+  let Inst{19-16} = addr{17-14};
+  let Inst{11-0} = addr{11-0};
+  let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
+}
+def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+                  (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_bh_ru,
+                  "ldrbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+  // {17-14}  Rn
+  // {13}     1 == Rm, 0 == imm12
+  // {12}     isAdd
+  // {11-0}   imm12/Rm
+  bits<18> addr;
+  let Inst{25} = addr{13};
+  let Inst{23} = addr{12};
   let Inst{21} = 1; // overwrite
+  let Inst{19-16} = addr{17-14};
+  let Inst{11-0} = addr{11-0};
+  let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
 }
-def LDRSBT : AI3ldstidx<0b1101, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
-                 (ins GPR:$base, am3offset:$offset), IndexModePost,
-                 LdMiscFrm, IIC_iLoad_bh_ru,
-                 "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+def LDRSBT : AI3ldstidxT<0b1101, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+             (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
+             "ldrsbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
   let Inst{21} = 1; // overwrite
 }
-def LDRHT : AI3ldstidx<0b1011, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
-                 (ins GPR:$base, am3offset:$offset), IndexModePost,
-                 LdMiscFrm, IIC_iLoad_bh_ru,
-                 "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+def LDRHT  : AI3ldstidxT<0b1011, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+             (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
+             "ldrht", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
   let Inst{21} = 1; // overwrite
 }
-def LDRSHT : AI3ldstidx<0b1111, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
-                 (ins GPR:$base, am3offset:$offset), IndexModePost,
-                 LdMiscFrm, IIC_iLoad_bh_ru,
-                 "ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+def LDRSHT : AI3ldstidxT<0b1111, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+             (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
+             "ldrsht", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
   let Inst{21} = 1; // overwrite
 }
 }
@@ -1734,55 +1795,61 @@ def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm,
                [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>;
 
 // Store doubleword
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1,
-    isCodeGenOnly = 1 in  // $src2 doesn't exist in asm string
-def STRD : AI3str<0b1111, (outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
                StMiscFrm, IIC_iStore_d_r,
-               "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
+               "strd", "\t$Rt, $src2, $addr", []>, Requires<[IsARM, HasV5TE]>;
 
 // Indexed stores
 def STR_PRE  : AI2stridx<0, 1, (outs GPR:$Rn_wb),
                      (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
                      IndexModePre, StFrm, IIC_iStore_ru,
-                     "str", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+                     "str", "\t$Rt, [$Rn, $offset]!",
+                     "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
                      [(set GPR:$Rn_wb,
                       (pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
 
 def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb),
                      (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
                      IndexModePost, StFrm, IIC_iStore_ru,
-                     "str", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+                     "str", "\t$Rt, [$Rn], $offset",
+                     "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
                      [(set GPR:$Rn_wb,
                       (post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
 
 def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb),
                      (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
                      IndexModePre, StFrm, IIC_iStore_bh_ru,
-                     "strb", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+                     "strb", "\t$Rt, [$Rn, $offset]!",
+                     "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
                      [(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt,
                                         GPR:$Rn, am2offset:$offset))]>;
 def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb),
                      (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
                      IndexModePost, StFrm, IIC_iStore_bh_ru,
-                     "strb", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+                     "strb", "\t$Rt, [$Rn], $offset",
+                     "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
                      [(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt,
                                         GPR:$Rn, am2offset:$offset))]>;
 
 def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb),
                      (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
                      IndexModePre, StMiscFrm, IIC_iStore_ru,
-                     "strh", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+                     "strh", "\t$Rt, [$Rn, $offset]!",
+                     "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
                      [(set GPR:$Rn_wb,
                       (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>;
 
 def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb),
                      (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
                      IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
-                     "strh", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+                     "strh", "\t$Rt, [$Rn], $offset",
+                     "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
                      [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
                                         GPR:$Rn, am3offset:$offset))]>;
 
 // For disassembly only
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
 def STRD_PRE : AI3stdpr<(outs GPR:$base_wb),
                      (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
                      StMiscFrm, IIC_iStore_d_ru,
@@ -1795,31 +1862,32 @@ def STRD_POST: AI3stdpo<(outs GPR:$base_wb),
                      StMiscFrm, IIC_iStore_d_ru,
                      "strd", "\t$src1, $src2, [$base], $offset",
                      "$base = $base_wb", []>;
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
 
 // STRT, STRBT, and STRHT are for disassembly only.
 
-def STRT : AI2stridx<0, 0, (outs GPR:$Rn_wb),
-                    (ins GPR:$Rt, GPR:$Rn,am2offset:$offset),
-                    IndexModeNone, StFrm, IIC_iStore_ru,
-                    "strt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
-                    [/* For disassembly only; pattern left blank */]> {
+def STRT : AI2stridxT<0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr),
+                     IndexModePost, StFrm, IIC_iStore_ru,
+                     "strt", "\t$Rt, $addr", "$addr.base = $Rn_wb",
+                     [/* For disassembly only; pattern left blank */]> {
   let Inst{21} = 1; // overwrite
+  let AsmMatchConverter = "CvtStWriteBackRegAddrMode2";
 }
 
-def STRBT : AI2stridx<1, 0, (outs GPR:$Rn_wb),
-                     (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
-                     IndexModeNone, StFrm, IIC_iStore_bh_ru,
-                     "strbt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
-                     [/* For disassembly only; pattern left blank */]> {
+def STRBT : AI2stridxT<1, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr),
+                      IndexModePost, StFrm, IIC_iStore_bh_ru,
+                      "strbt", "\t$Rt, $addr", "$addr.base = $Rn_wb",
+                      [/* For disassembly only; pattern left blank */]> {
   let Inst{21} = 1; // overwrite
+  let AsmMatchConverter = "CvtStWriteBackRegAddrMode2";
 }
 
-def STRHT: AI3sthpo<(outs GPR:$base_wb),
-                    (ins GPR:$src, GPR:$base,am3offset:$offset),
+def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$Rt, addrmode3:$addr),
                     StMiscFrm, IIC_iStore_bh_ru,
-                    "strht", "\t$src, [$base], $offset", "$base = $base_wb",
+                    "strht", "\t$Rt, $addr", "$addr.base = $base_wb",
                     [/* For disassembly only; pattern left blank */]> {
   let Inst{21} = 1; // overwrite
+  let AsmMatchConverter = "CvtStWriteBackRegAddrMode3";
 }
 
 //===----------------------------------------------------------------------===//
@@ -1892,7 +1960,7 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
     let Inst{21}    = 1;          // Writeback
     let Inst{20}    = L_bit;
   }
-} 
+}
 
 let neverHasSideEffects = 1 in {
 
@@ -1912,16 +1980,10 @@ def : MnemonicAlias<"stm", "stmia">;
 // FIXME: Should pc be an implicit operand like PICADD, etc?
 let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
     hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
-// FIXME: Should be a pseudo-instruction.
-def LDMIA_RET : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
-                                      reglist:$regs, variable_ops),
-                     IndexModeUpd, LdStMulFrm, IIC_iLoad_mBr,
-                     "ldmia${p}\t$Rn!, $regs",
-                     "$Rn = $wb", []> {
-  let Inst{24-23} = 0b01;       // Increment After
-  let Inst{21}    = 1;          // Writeback
-  let Inst{20}    = 1;          // Load
-}
+def LDMIA_RET : ARMPseudoInst<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+                                               reglist:$regs, variable_ops),
+                     Size4Bytes, IIC_iLoad_mBr, []>,
+      RegConstraint<"$Rn = $wb">;
 
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
@@ -1933,6 +1995,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
   bits<4> Rd;
   bits<4> Rm;
 
+  let Inst{19-16} = 0b0000;
   let Inst{11-4} = 0b00000000;
   let Inst{25} = 0;
   let Inst{3-0} = Rm;
@@ -1959,6 +2022,7 @@ def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
   bits<4> Rd;
   bits<12> src;
   let Inst{15-12} = Rd;
+  let Inst{19-16} = 0b0000;
   let Inst{11-0} = src;
   let Inst{25} = 0;
 }
@@ -2145,10 +2209,12 @@ defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
                           BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
 
 // ADC and SUBC with 's' bit set.
-defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
-                          BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
-defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
-                          BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
+let usesCustomInserter = 1 in {
+defm ADCS : AI1_adde_sube_s_irs<
+              BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
+defm SBCS : AI1_adde_sube_s_irs<
+              BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
+}
 
 def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
                  IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm",
@@ -2190,31 +2256,17 @@ def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
 }
 
 // RSB with 's' bit set.
-let isCodeGenOnly = 1, Defs = [CPSR] in {
-def RSBSri : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
-                 IIC_iALUi, "rsbs", "\t$Rd, $Rn, $imm",
-                 [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]> {
-  bits<4> Rd;
-  bits<4> Rn;
-  bits<12> imm;
-  let Inst{25} = 1;
-  let Inst{20} = 1;
-  let Inst{15-12} = Rd;
-  let Inst{19-16} = Rn;
-  let Inst{11-0} = imm;
-}
-def RSBSrs : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
-                 DPSoRegFrm, IIC_iALUsr, "rsbs", "\t$Rd, $Rn, $shift",
-                 [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]> {
-  bits<4> Rd;
-  bits<4> Rn;
-  bits<12> shift;
-  let Inst{25} = 0;
-  let Inst{20} = 1;
-  let Inst{11-0} = shift;
-  let Inst{15-12} = Rd;
-  let Inst{19-16} = Rn;
-}
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1 in {
+def RSBSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+                 Size4Bytes, IIC_iALUi,
+                 [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]>;
+def RSBSrr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                 Size4Bytes, IIC_iALUr,
+                 [/* For disassembly only; pattern left blank */]>;
+def RSBSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                 Size4Bytes, IIC_iALUsr,
+                 [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]>;
 }
 
 let Uses = [CPSR] in {
@@ -2258,34 +2310,14 @@ def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
 }
 }
 
-// FIXME: Allow these to be predicated.
-let isCodeGenOnly = 1, Defs = [CPSR], Uses = [CPSR] in {
-def RSCSri : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
-                  DPFrm, IIC_iALUi, "rscs\t$Rd, $Rn, $imm",
-                  [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
-                  Requires<[IsARM]> {
-  bits<4> Rd;
-  bits<4> Rn;
-  bits<12> imm;
-  let Inst{25} = 1;
-  let Inst{20} = 1;
-  let Inst{15-12} = Rd;
-  let Inst{19-16} = Rn;
-  let Inst{11-0} = imm;
-}
-def RSCSrs : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
-                  DPSoRegFrm, IIC_iALUsr, "rscs\t$Rd, $Rn, $shift",
-                  [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
-                  Requires<[IsARM]> {
-  bits<4> Rd;
-  bits<4> Rn;
-  bits<12> shift;
-  let Inst{25} = 0;
-  let Inst{20} = 1;
-  let Inst{11-0} = shift;
-  let Inst{15-12} = Rd;
-  let Inst{19-16} = Rn;
-}
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1, Uses = [CPSR] in {
+def RSCSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+                  Size4Bytes, IIC_iALUi,
+                  [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>;
+def RSCSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                  Size4Bytes, IIC_iALUsr,
+                  [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>;
 }
 
 // (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
@@ -2300,8 +2332,10 @@ def : ARMPat<(addc   GPR:$src, so_imm_neg:$imm),
 // The with-carry-in form matches bitwise not instead of the negation.
 // Effectively, the inverse interpretation of the carry flag already accounts
 // for part of the negation.
-def : ARMPat<(adde   GPR:$src, so_imm_not:$imm),
+def : ARMPat<(adde_dead_carry   GPR:$src, so_imm_not:$imm),
              (SBCri  GPR:$src, so_imm_not:$imm)>;
+def : ARMPat<(adde_live_carry   GPR:$src, so_imm_not:$imm),
+             (SBCSri GPR:$src, so_imm_not:$imm)>;
 
 // Note: These are implemented in C++ code, because they have to generate
 // ADD/SUBrs instructions, which use a complex pattern that a xform function
@@ -2617,14 +2651,16 @@ def MULv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
 def MUL  : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
                    IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
                    [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
-                   Requires<[IsARM, HasV6]>;
+                   Requires<[IsARM, HasV6]> {
+  let Inst{15-12} = 0b0000;
+}
 }
 
 let Constraints = "@earlyclobber $Rd" in
 def MLAv5: ARMPseudoInst<(outs GPR:$Rd),
                          (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
-                         Size4Bytes, IIC_iMAC32, 
-                         [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, 
+                         Size4Bytes, IIC_iMAC32,
+                         [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
                         Requires<[IsARM, NoV6]> {
   bits<4> Ra;
   let Inst{15-12} = Ra;
@@ -2657,7 +2693,7 @@ let neverHasSideEffects = 1 in {
 let isCommutable = 1 in {
 let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
 def SMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
-                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
                             Size4Bytes, IIC_iMUL64, []>,
                            Requires<[IsARM, NoV6]>;
 
@@ -2681,15 +2717,15 @@ def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
 // Multiply + accumulate
 let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
 def SMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
-                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
                             Size4Bytes, IIC_iMAC64, []>,
                            Requires<[IsARM, NoV6]>;
 def UMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
-                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
                             Size4Bytes, IIC_iMAC64, []>,
                            Requires<[IsARM, NoV6]>;
 def UMAALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
-                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
                             Size4Bytes, IIC_iMAC64, []>,
                            Requires<[IsARM, NoV6]>;
 
@@ -2970,17 +3006,25 @@ def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
                IIC_iUNAr, "revsh", "\t$Rd, $Rm",
                [(set GPR:$Rd,
                   (sext_inreg
-                    (or (srl (and GPR:$Rm, 0xFF00), (i32 8)),
+                    (or (srl GPR:$Rm, (i32 8)),
                         (shl GPR:$Rm, (i32 8))), i16))]>,
                Requires<[IsARM, HasV6]>;
 
+def : ARMV6Pat<(sext_inreg (or (srl (and GPR:$Rm, 0xFF00), (i32 8)),
+                               (shl GPR:$Rm, (i32 8))), i16),
+               (REVSH GPR:$Rm)>;
+
+// Need the AddedComplexity or else MOVs + REV would be chosen.
+let AddedComplexity = 5 in
+def : ARMV6Pat<(sra (bswap GPR:$Rm), (i32 16)), (REVSH GPR:$Rm)>;
+
 def lsl_shift_imm : SDNodeXForm<imm, [{
   unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
   return CurDAG->getTargetConstant(Sh, MVT::i32);
 }]>;
 
-def lsl_amt : PatLeaf<(i32 imm), [{
-  return (N->getZExtValue() < 32);
+def lsl_amt : ImmLeaf<i32, [{
+  return Imm > 0 && Imm < 32;
 }], lsl_shift_imm>;
 
 def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd),
@@ -3002,8 +3046,8 @@ def asr_shift_imm : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(Sh, MVT::i32);
 }]>;
 
-def asr_amt : PatLeaf<(i32 imm), [{
-  return (N->getZExtValue() <= 32);
+def asr_amt : ImmLeaf<i32, [{
+  return Imm > 0 && Imm <= 32;
 }], asr_shift_imm>;
 
 // Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
@@ -3119,88 +3163,43 @@ def BCCZi64 : PseudoInst<(outs),
 // Conditional moves
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
 // a two-value operand where a dag node expects two operands. :(
-// FIXME: These should all be pseudo-instructions that get expanded to
-//        the normal MOV instructions. That would fix the dependency on
-//        special casing them in tblgen.
 let neverHasSideEffects = 1 in {
-def MOVCCr : AI1<0b1101, (outs GPR:$Rd), (ins GPR:$false, GPR:$Rm), DPFrm,
-                IIC_iCMOVr, "mov", "\t$Rd, $Rm",
-      [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
-                RegConstraint<"$false = $Rd">, UnaryDP {
-  bits<4> Rd;
-  bits<4> Rm;
-  let Inst{25} = 0;
-  let Inst{20} = 0;
-  let Inst{15-12} = Rd;
-  let Inst{11-4} = 0b00000000;
-  let Inst{3-0} = Rm;
-}
-
-def MOVCCs : AI1<0b1101, (outs GPR:$Rd),
-                 (ins GPR:$false, so_reg:$shift), DPSoRegFrm, IIC_iCMOVsr,
-                "mov", "\t$Rd, $shift",
-   [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>,
-                RegConstraint<"$false = $Rd">, UnaryDP {
-  bits<4> Rd;
-  bits<12> shift;
-  let Inst{25} = 0;
-  let Inst{20} = 0;
-  let Inst{19-16} = 0;
-  let Inst{15-12} = Rd;
-  let Inst{11-0} = shift;
-}
+def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
+                           Size4Bytes, IIC_iCMOVr,
+  [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+      RegConstraint<"$false = $Rd">;
+def MOVCCs : ARMPseudoInst<(outs GPR:$Rd),
+                           (ins GPR:$false, so_reg:$shift, pred:$p),
+                           Size4Bytes, IIC_iCMOVsr,
+  [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>,
+      RegConstraint<"$false = $Rd">;
 
 let isMoveImm = 1 in
-def MOVCCi16 : AI1<0b1000, (outs GPR:$Rd), (ins GPR:$false, i32imm_hilo16:$imm),
-                 DPFrm, IIC_iMOVi,
-                 "movw", "\t$Rd, $imm",
-                 []>,
-                 RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>,
-                 UnaryDP {
-  bits<4> Rd;
-  bits<16> imm;
-  let Inst{25} = 1;
-  let Inst{20} = 0;
-  let Inst{19-16} = imm{15-12};
-  let Inst{15-12} = Rd;
-  let Inst{11-0}  = imm{11-0};
-}
+def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd),
+                             (ins GPR:$false, i32imm_hilo16:$imm, pred:$p),
+                             Size4Bytes, IIC_iMOVi,
+                             []>,
+      RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
 
 let isMoveImm = 1 in
-def MOVCCi : AI1<0b1101, (outs GPR:$Rd),
-                         (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi,
-                "mov", "\t$Rd, $imm",
+def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
+                           (ins GPR:$false, so_imm:$imm, pred:$p),
+                           Size4Bytes, IIC_iCMOVi,
    [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
-                RegConstraint<"$false = $Rd">, UnaryDP {
-  bits<4> Rd;
-  bits<12> imm;
-  let Inst{25} = 1;
-  let Inst{20} = 0;
-  let Inst{19-16} = 0b0000;
-  let Inst{15-12} = Rd;
-  let Inst{11-0} = imm;
-}
+      RegConstraint<"$false = $Rd">;
 
 // Two instruction predicate mov immediate.
 let isMoveImm = 1 in
-def MOVCCi32imm : PseudoInst<(outs GPR:$Rd),
-                             (ins GPR:$false, i32imm:$src, pred:$p),
-                  IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
+def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd),
+                                (ins GPR:$false, i32imm:$src, pred:$p),
+                  Size8Bytes, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
 
 let isMoveImm = 1 in
-def MVNCCi : AI1<0b1111, (outs GPR:$Rd),
-                         (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi,
-                "mvn", "\t$Rd, $imm",
+def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
+                           (ins GPR:$false, so_imm:$imm, pred:$p),
+                           Size4Bytes, IIC_iCMOVi,
  [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
-                RegConstraint<"$false = $Rd">, UnaryDP {
-  bits<4> Rd;
-  bits<12> imm;
-  let Inst{25} = 1;
-  let Inst{20} = 0;
-  let Inst{19-16} = 0b0000;
-  let Inst{15-12} = Rd;
-  let Inst{11-0} = imm;
-}
+                RegConstraint<"$false = $Rd">;
 } // neverHasSideEffects
 
 //===----------------------------------------------------------------------===//
@@ -3221,13 +3220,6 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
   let Inst{31-4} = 0xf57ff05;
   let Inst{3-0} = opt;
 }
-
-def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
-                       "mcr", "\tp15, 0, $zero, c7, c10, 5",
-                       [(ARMMemBarrierMCR GPR:$zero)]>,
-                       Requires<[IsARM, HasV6]> {
-  // FIXME: add encoding
-}
 }
 
 def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
@@ -3266,6 +3258,18 @@ let usesCustomInserter = 1 in {
     def ATOMIC_LOAD_NAND_I8 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
       [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_MIN_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_MAX_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
     def ATOMIC_LOAD_ADD_I16 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
       [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
@@ -3284,6 +3288,18 @@ let usesCustomInserter = 1 in {
     def ATOMIC_LOAD_NAND_I16 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
       [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_MIN_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_MAX_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
     def ATOMIC_LOAD_ADD_I32 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
       [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
@@ -3302,6 +3318,18 @@ let usesCustomInserter = 1 in {
     def ATOMIC_LOAD_NAND_I32 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
       [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_MIN_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_MAX_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+    def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+      [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
 
     def ATOMIC_SWAP_I8 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
@@ -3326,39 +3354,26 @@ let usesCustomInserter = 1 in {
 }
 
 let mayLoad = 1 in {
-def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
-                    "ldrexb", "\t$Rt, [$Rn]",
-                    []>;
-def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
-                    "ldrexh", "\t$Rt, [$Rn]",
-                    []>;
-def LDREX  : AIldrex<0b00, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
-                    "ldrex", "\t$Rt, [$Rn]",
-                    []>;
-def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins GPR:$Rn),
-                    NoItinerary,
-                    "ldrexd", "\t$Rt, $Rt2, [$Rn]",
-                    []>;
+def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
+                    "ldrexb", "\t$Rt, $addr", []>;
+def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
+                    "ldrexh", "\t$Rt, $addr", []>;
+def LDREX  : AIldrex<0b00, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
+                    "ldrex", "\t$Rt, $addr", []>;
+def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode7:$addr),
+                    NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []>;
 }
 
 let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
-def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$src, GPR:$Rn),
-                    NoItinerary,
-                    "strexb", "\t$Rd, $src, [$Rn]",
-                    []>;
-def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn),
-                    NoItinerary,
-                    "strexh", "\t$Rd, $Rt, [$Rn]",
-                    []>;
-def STREX  : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn),
-                    NoItinerary,
-                    "strex", "\t$Rd, $Rt, [$Rn]",
-                    []>;
+def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+                    NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>;
+def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+                    NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>;
+def STREX  : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+                    NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
 def STREXD : AIstrex<0b01, (outs GPR:$Rd),
-                    (ins GPR:$Rt, GPR:$Rt2, GPR:$Rn),
-                    NoItinerary,
-                    "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]",
-                    []>;
+                    (ins GPR:$Rt, GPR:$Rt2, addrmode7:$addr),
+                    NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []>;
 }
 
 // Clear-Exclusive is for disassembly only.
@@ -3377,238 +3392,7 @@ def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb",
 }
 
 //===----------------------------------------------------------------------===//
-// TLS Instructions
-//
-
-// __aeabi_read_tp preserves the registers r1-r3.
-// This is a pseudo inst so that we can get the encoding right, 
-// complete with fixup for the aeabi_read_tp function.
-let isCall = 1,
-  Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
-  def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
-               [(set R0, ARMthread_pointer)]>;
-}
-
-//===----------------------------------------------------------------------===//
-// SJLJ Exception handling intrinsics
-//   eh_sjlj_setjmp() is an instruction sequence to store the return
-//   address and save #0 in R0 for the non-longjmp case.
-//   Since by its nature we may be coming from some other function to get
-//   here, and we're using the stack frame for the containing function to
-//   save/restore registers, we can't keep anything live in regs across
-//   the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
-//   when we get here from a longjmp(). We force everthing out of registers
-//   except for our own input by listing the relevant registers in Defs. By
-//   doing so, we also cause the prologue/epilogue code to actively preserve
-//   all of the callee-saved resgisters, which is exactly what we want.
-//   A constant value is passed in $val, and we use the location as a scratch.
-//
-// These are pseudo-instructions and are lowered to individual MC-insts, so
-// no encoding information is necessary.
-let Defs =
-  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR,  D0,
-    D1,  D2,  D3,  D4,  D5,  D6,  D7,  D8,  D9,  D10, D11, D12, D13, D14, D15,
-    D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
-    D31 ], hasSideEffects = 1, isBarrier = 1 in {
-  def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
-                               NoItinerary,
-                         [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
-                           Requires<[IsARM, HasVFP2]>;
-}
-
-let Defs =
-  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR ],
-  hasSideEffects = 1, isBarrier = 1 in {
-  def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
-                                   NoItinerary,
-                         [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
-                                Requires<[IsARM, NoVFP]>;
-}
-
-// FIXME: Non-Darwin version(s)
-let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
-    Defs = [ R7, LR, SP ] in {
-def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
-                             NoItinerary,
-                         [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
-                                Requires<[IsARM, IsDarwin]>;
-}
-
-// eh.sjlj.dispatchsetup pseudo-instruction.
-// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are
-// handled when the pseudo is expanded (which happens before any passes
-// that need the instruction size).
-let isBarrier = 1, hasSideEffects = 1 in
-def Int_eh_sjlj_dispatchsetup :
- PseudoInst<(outs), (ins GPR:$src), NoItinerary,
-            [(ARMeh_sjlj_dispatchsetup GPR:$src)]>,
-              Requires<[IsDarwin]>;
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//
-
-// Large immediate handling.
-
-// 32-bit immediate using two piece so_imms or movw + movt.
-// This is a single pseudo instruction, the benefit is that it can be remat'd
-// as a single unit instead of having to handle reg inputs.
-// FIXME: Remove this when we can do generalized remat.
-let isReMaterializable = 1, isMoveImm = 1 in
-def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
-                           [(set GPR:$dst, (arm_i32imm:$src))]>,
-                           Requires<[IsARM]>;
-
-// Pseudo instruction that combines movw + movt + add pc (if PIC).
-// It also makes it possible to rematerialize the instructions.
-// FIXME: Remove this when we can do generalized remat and when machine licm
-// can properly the instructions.
-let isReMaterializable = 1 in {
-def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
-                              IIC_iMOVix2addpc,
-                        [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
-                        Requires<[IsARM, UseMovt]>;
-
-def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
-                             IIC_iMOVix2,
-                        [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
-                        Requires<[IsARM, UseMovt]>;
-
-let AddedComplexity = 10 in
-def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
-                                IIC_iMOVix2ld,
-                    [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
-                    Requires<[IsARM, UseMovt]>;
-} // isReMaterializable
-
-// ConstantPool, GlobalAddress, and JumpTable
-def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
-            Requires<[IsARM, DontUseMovt]>;
-def : ARMPat<(ARMWrapper  tconstpool  :$dst), (LEApcrel tconstpool  :$dst)>;
-def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
-            Requires<[IsARM, UseMovt]>;
-def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
-             (LEApcrelJT tjumptable:$dst, imm:$id)>;
-
-// TODO: add,sub,and, 3-instr forms?
-
-// Tail calls
-def : ARMPat<(ARMtcret tcGPR:$dst),
-          (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
-          (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
-          (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
-
-def : ARMPat<(ARMtcret tcGPR:$dst),
-          (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
-          (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
-          (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
-
-// Direct calls
-def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
-      Requires<[IsARM, IsNotDarwin]>;
-def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>,
-      Requires<[IsARM, IsDarwin]>;
-
-// zextload i1 -> zextload i8
-def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
-def : ARMPat<(zextloadi1 ldst_so_reg:$addr),    (LDRBrs ldst_so_reg:$addr)>;
-
-// extload -> zextload
-def : ARMPat<(extloadi1 addrmode_imm12:$addr),  (LDRBi12 addrmode_imm12:$addr)>;
-def : ARMPat<(extloadi1 ldst_so_reg:$addr),     (LDRBrs ldst_so_reg:$addr)>;
-def : ARMPat<(extloadi8 addrmode_imm12:$addr),  (LDRBi12 addrmode_imm12:$addr)>;
-def : ARMPat<(extloadi8 ldst_so_reg:$addr),     (LDRBrs ldst_so_reg:$addr)>;
-
-def : ARMPat<(extloadi16 addrmode3:$addr),  (LDRH addrmode3:$addr)>;
-
-def : ARMPat<(extloadi8  addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
-def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
-
-// smul* and smla*
-def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
-                      (sra (shl GPR:$b, (i32 16)), (i32 16))),
-                 (SMULBB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
-                 (SMULBB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
-                      (sra GPR:$b, (i32 16))),
-                 (SMULBT GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
-                 (SMULBT GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
-                      (sra (shl GPR:$b, (i32 16)), (i32 16))),
-                 (SMULTB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
-                (SMULTB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
-                      (i32 16)),
-                 (SMULWB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
-                 (SMULWB GPR:$a, GPR:$b)>;
-
-def : ARMV5TEPat<(add GPR:$acc,
-                      (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
-                           (sra (shl GPR:$b, (i32 16)), (i32 16)))),
-                 (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
-                      (mul sext_16_node:$a, sext_16_node:$b)),
-                 (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
-                      (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
-                           (sra GPR:$b, (i32 16)))),
-                 (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
-                      (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
-                 (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
-                      (mul (sra GPR:$a, (i32 16)),
-                           (sra (shl GPR:$b, (i32 16)), (i32 16)))),
-                 (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
-                      (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
-                 (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
-                      (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
-                           (i32 16))),
-                 (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
-                      (sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
-                 (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
-
-//===----------------------------------------------------------------------===//
-// Thumb Support
-//
-
-include "ARMInstrThumb.td"
-
-//===----------------------------------------------------------------------===//
-// Thumb2 Support
-//
-
-include "ARMInstrThumb2.td"
-
-//===----------------------------------------------------------------------===//
-// Floating Point Support
-//
-
-include "ARMInstrVFP.td"
-
-//===----------------------------------------------------------------------===//
-// Advanced SIMD (NEON) Support
-//
-
-include "ARMInstrNEON.td"
-
-//===----------------------------------------------------------------------===//
-// Coprocessor Instructions.  For disassembly only.
+// Coprocessor Instructions.
 //
 
 def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
@@ -3652,17 +3436,18 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
   let Inst{23-20} = opc1;
 }
 
-class ACI<dag oops, dag iops, string opc, string asm>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, NoItinerary,
-      opc, asm, "", [/* For disassembly only; pattern left blank */]> {
+class ACI<dag oops, dag iops, string opc, string asm,
+          IndexMode im = IndexModeNone>
+  : InoP<oops, iops, AddrModeNone, Size4Bytes, im, BrFrm, NoItinerary,
+         opc, asm, "", [/* For disassembly only; pattern left blank */]> {
   let Inst{27-25} = 0b110;
 }
 
-multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
+multiclass LdStCop<bits<4> op31_28, bit load, dag ops, string opc, string cond>{
 
   def _OFFSET : ACI<(outs),
-      (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
-      opc, "\tp$cop, cr$CRd, $addr"> {
+      !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+      !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr"> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 1; // P = 1
     let Inst{21} = 0; // W = 0
@@ -3671,8 +3456,8 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
   }
 
   def _PRE : ACI<(outs),
-      (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
-      opc, "\tp$cop, cr$CRd, $addr!"> {
+      !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+      !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr!", IndexModePre> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 1; // P = 1
     let Inst{21} = 1; // W = 1
@@ -3681,8 +3466,8 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
   }
 
   def _POST : ACI<(outs),
-      (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
-      opc, "\tp$cop, cr$CRd, [$base], $offset"> {
+      !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+      !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr", IndexModePost> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 0; // P = 0
     let Inst{21} = 1; // W = 1
@@ -3691,8 +3476,9 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
   }
 
   def _OPTION : ACI<(outs),
-      (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, i32imm:$option),
-      opc, "\tp$cop, cr$CRd, [$base], $option"> {
+      !con((ins nohash_imm:$cop,nohash_imm:$CRd,GPR:$base, nohash_imm:$option),
+            ops),
+      !strconcat(opc, cond), "\tp$cop, cr$CRd, [$base], \\{$option\\}"> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 0; // P = 0
     let Inst{23} = 1; // U = 1
@@ -3702,8 +3488,8 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
   }
 
   def L_OFFSET : ACI<(outs),
-      (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
-      !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr"> {
+      !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+      !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr"> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 1; // P = 1
     let Inst{21} = 0; // W = 0
@@ -3712,8 +3498,9 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
   }
 
   def L_PRE : ACI<(outs),
-      (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
-      !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr!"> {
+      !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+      !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr!",
+      IndexModePre> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 1; // P = 1
     let Inst{21} = 1; // W = 1
@@ -3722,8 +3509,9 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
   }
 
   def L_POST : ACI<(outs),
-      (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
-      !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $offset"> {
+      !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+      !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr",
+      IndexModePost> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 0; // P = 0
     let Inst{21} = 1; // W = 1
@@ -3732,8 +3520,10 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
   }
 
   def L_OPTION : ACI<(outs),
-      (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option),
-      !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $option"> {
+      !con((ins nohash_imm:$cop, nohash_imm:$CRd,GPR:$base,nohash_imm:$option),
+            ops),
+      !strconcat(!strconcat(opc, "l"), cond),
+      "\tp$cop, cr$CRd, [$base], \\{$option\\}"> {
     let Inst{31-28} = op31_28;
     let Inst{24} = 0; // P = 0
     let Inst{23} = 1; // U = 1
@@ -3743,19 +3533,18 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
   }
 }
 
-defm LDC  : LdStCop<{?,?,?,?}, 1, "ldc">;
-defm LDC2 : LdStCop<0b1111,    1, "ldc2">;
-defm STC  : LdStCop<{?,?,?,?}, 0, "stc">;
-defm STC2 : LdStCop<0b1111,    0, "stc2">;
+defm LDC  : LdStCop<{?,?,?,?}, 1, (ins pred:$p), "ldc",  "${p}">;
+defm LDC2 : LdStCop<0b1111,    1, (ins),         "ldc2", "">;
+defm STC  : LdStCop<{?,?,?,?}, 0, (ins pred:$p), "stc",  "${p}">;
+defm STC2 : LdStCop<0b1111,    0, (ins),         "stc2", "">;
 
 //===----------------------------------------------------------------------===//
 // Move between coprocessor and ARM core register -- for disassembly only
 //
 
-class MovRCopro<string opc, bit direction>
-  : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
-        GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
-        NoItinerary, opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2",
+class MovRCopro<string opc, bit direction, dag oops, dag iops>
+  : ABI<0b1110, oops, iops, NoItinerary, opc,
+        "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2",
         [/* For disassembly only; pattern left blank */]> {
   let Inst{20} = direction;
   let Inst{4} = 1;
@@ -3775,13 +3564,17 @@ class MovRCopro<string opc, bit direction>
   let Inst{19-16} = CRn;
 }
 
-def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>;
-def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>;
+def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
+                    (outs), (ins p_imm:$cop, i32imm:$opc1,
+                                 GPR:$Rt, c_imm:$CRn, c_imm:$CRm,
+                                 i32imm:$opc2)>;
+def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
+                    (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1,
+                                         c_imm:$CRn, c_imm:$CRm, i32imm:$opc2)>;
 
-class MovRCopro2<string opc, bit direction>
-  : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
-         GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
-         NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+class MovRCopro2<string opc, bit direction, dag oops, dag iops>
+  : ABXI<0b1110, oops, iops, NoItinerary,
+         !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
          [/* For disassembly only; pattern left blank */]> {
   let Inst{31-28} = 0b1111;
   let Inst{20} = direction;
@@ -3802,8 +3595,14 @@ class MovRCopro2<string opc, bit direction>
   let Inst{19-16} = CRn;
 }
 
-def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */>;
-def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */>;
+def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
+                      (outs), (ins p_imm:$cop, i32imm:$opc1,
+                                   GPR:$Rt, c_imm:$CRn, c_imm:$CRm,
+                                   i32imm:$opc2)>;
+def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
+                      (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1,
+                                           c_imm:$CRn, c_imm:$CRm,
+                                           i32imm:$opc2)>;
 
 class MovRRCopro<string opc, bit direction>
   : ABI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1,
@@ -3909,3 +3708,241 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask,  so_imm:$a), NoItinerary,
   let Inst{15-12} = 0b1111;
   let Inst{11-0} = a;
 }
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
+let isCall = 1,
+  Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
+  def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
+               [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+//   eh_sjlj_setjmp() is an instruction sequence to store the return
+//   address and save #0 in R0 for the non-longjmp case.
+//   Since by its nature we may be coming from some other function to get
+//   here, and we're using the stack frame for the containing function to
+//   save/restore registers, we can't keep anything live in regs across
+//   the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+//   when we get here from a longjmp(). We force everything out of registers
+//   except for our own input by listing the relevant registers in Defs. By
+//   doing so, we also cause the prologue/epilogue code to actively preserve
+//   all of the callee-saved resgisters, which is exactly what we want.
+//   A constant value is passed in $val, and we use the location as a scratch.
+//
+// These are pseudo-instructions and are lowered to individual MC-insts, so
+// no encoding information is necessary.
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR,  D0,
+    D1,  D2,  D3,  D4,  D5,  D6,  D7,  D8,  D9,  D10, D11, D12, D13, D14, D15,
+    D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
+    D31 ], hasSideEffects = 1, isBarrier = 1 in {
+  def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+                               NoItinerary,
+                         [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+                           Requires<[IsARM, HasVFP2]>;
+}
+
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR ],
+  hasSideEffects = 1, isBarrier = 1 in {
+  def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+                                   NoItinerary,
+                         [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+                                Requires<[IsARM, NoVFP]>;
+}
+
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+    Defs = [ R7, LR, SP ] in {
+def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
+                             NoItinerary,
+                         [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+                                Requires<[IsARM, IsDarwin]>;
+}
+
+// eh.sjlj.dispatchsetup pseudo-instruction.
+// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are
+// handled when the pseudo is expanded (which happens before any passes
+// that need the instruction size).
+let isBarrier = 1, hasSideEffects = 1 in
+def Int_eh_sjlj_dispatchsetup :
+ PseudoInst<(outs), (ins), NoItinerary,
+            [(ARMeh_sjlj_dispatchsetup)]>,
+              Requires<[IsDarwin]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// Large immediate handling.
+
+// 32-bit immediate using two piece so_imms or movw + movt.
+// This is a single pseudo instruction, the benefit is that it can be remat'd
+// as a single unit instead of having to handle reg inputs.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+                           [(set GPR:$dst, (arm_i32imm:$src))]>,
+                           Requires<[IsARM]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if PIC).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+                              IIC_iMOVix2addpc,
+                        [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+                        Requires<[IsARM, UseMovt]>;
+
+def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+                             IIC_iMOVix2,
+                        [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+                        Requires<[IsARM, UseMovt]>;
+
+let AddedComplexity = 10 in
+def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+                                IIC_iMOVix2ld,
+                    [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
+                    Requires<[IsARM, UseMovt]>;
+} // isReMaterializable
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
+            Requires<[IsARM, DontUseMovt]>;
+def : ARMPat<(ARMWrapper  tconstpool  :$dst), (LEApcrel tconstpool  :$dst)>;
+def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
+            Requires<[IsARM, UseMovt]>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+             (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// TODO: add,sub,and, 3-instr forms?
+
+// Tail calls
+def : ARMPat<(ARMtcret tcGPR:$dst),
+          (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+          (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+          (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret tcGPR:$dst),
+          (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+          (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+          (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+// Direct calls
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
+      Requires<[IsARM, IsNotDarwin]>;
+def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>,
+      Requires<[IsARM, IsDarwin]>;
+
+// zextload i1 -> zextload i8
+def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(zextloadi1 ldst_so_reg:$addr),    (LDRBrs ldst_so_reg:$addr)>;
+
+// extload -> zextload
+def : ARMPat<(extloadi1 addrmode_imm12:$addr),  (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi1 ldst_so_reg:$addr),     (LDRBrs ldst_so_reg:$addr)>;
+def : ARMPat<(extloadi8 addrmode_imm12:$addr),  (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi8 ldst_so_reg:$addr),     (LDRBrs ldst_so_reg:$addr)>;
+
+def : ARMPat<(extloadi16 addrmode3:$addr),  (LDRH addrmode3:$addr)>;
+
+def : ARMPat<(extloadi8  addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
+def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
+
+// smul* and smla*
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+                      (sra (shl GPR:$b, (i32 16)), (i32 16))),
+                 (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
+                 (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+                      (sra GPR:$b, (i32 16))),
+                 (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
+                 (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
+                      (sra (shl GPR:$b, (i32 16)), (i32 16))),
+                 (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
+                (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+                      (i32 16)),
+                 (SMULWB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
+                 (SMULWB GPR:$a, GPR:$b)>;
+
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+                           (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+                 (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul sext_16_node:$a, sext_16_node:$b)),
+                 (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+                           (sra GPR:$b, (i32 16)))),
+                 (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
+                 (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra GPR:$a, (i32 16)),
+                           (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+                 (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
+                 (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+                           (i32 16))),
+                 (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
+                 (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+
+
+// Pre-v7 uses MCR for synchronization barriers.
+def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
+         Requires<[IsARM, HasV6]>;
+
+
+//===----------------------------------------------------------------------===//
+// Thumb Support
+//
+
+include "ARMInstrThumb.td"
+
+//===----------------------------------------------------------------------===//
+// Thumb2 Support
+//
+
+include "ARMInstrThumb2.td"
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//
+
+include "ARMInstrVFP.td"
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD (NEON) Support
+//
+
+include "ARMInstrNEON.td"
+
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index dc3d63e..e34d69a 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -80,6 +80,12 @@ def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
 def NEONvorrImm   : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
 def NEONvbicImm   : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
 
+def NEONvbsl      : SDNode<"ARMISD::VBSL",
+                           SDTypeProfile<1, 3, [SDTCisVec<0>,
+                                                SDTCisSameAs<0, 1>,
+                                                SDTCisSameAs<0, 2>,
+                                                SDTCisSameAs<0, 3>]>>;
+
 def NEONvdup      : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
 
 // VDUPLANE can produce a quad-register result from a double-register source,
@@ -146,10 +152,6 @@ def VLDMQIA
   : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
                     IIC_fpLoad_m, "",
                    [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
-def VLDMQDB
-  : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
-                    IIC_fpLoad_m, "",
-                   [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
 
 // Use VSTM to store a Q register as a D register pair.
 // This is a pseudo instruction that is expanded to VSTMD after reg alloc.
@@ -157,10 +159,6 @@ def VSTMQIA
   : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
                     IIC_fpStore_m, "",
                    [(store (v2f64 QPR:$src), GPR:$Rn)]>;
-def VSTMQDB
-  : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
-                    IIC_fpStore_m, "",
-                   [(store (v2f64 QPR:$src), GPR:$Rn)]>;
 
 // Classes for VLD* pseudo-instructions with multi-register operands.
 // These are expanded to real instructions after register allocation.
@@ -1801,7 +1799,7 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 class N3VDSL<bits<2> op21_20, bits<4> op11_8,
              InstrItinClass itin, string OpcodeStr, string Dt,
              ValueType Ty, SDNode ShOp>
-  : N3V<0, 1, op21_20, op11_8, 1, 0,
+  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
         [(set (Ty DPR:$Vd),
@@ -1811,7 +1809,7 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
 }
 class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
                string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
-  : N3V<0, 1, op21_20, op11_8, 1, 0,
+  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
         NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
         [(set (Ty DPR:$Vd),
@@ -1841,7 +1839,7 @@ class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 class N3VQSL<bits<2> op21_20, bits<4> op11_8,
              InstrItinClass itin, string OpcodeStr, string Dt,
              ValueType ResTy, ValueType OpTy, SDNode ShOp>
-  : N3V<1, 1, op21_20, op11_8, 1, 0,
+  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
         [(set (ResTy QPR:$Vd),
@@ -1852,7 +1850,7 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
 }
 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
                ValueType ResTy, ValueType OpTy, SDNode ShOp>
-  : N3V<1, 1, op21_20, op11_8, 1, 0,
+  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
         NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
         [(set (ResTy QPR:$Vd),
@@ -1874,7 +1872,7 @@ class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 }
 class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
-  : N3V<0, 1, op21_20, op11_8, 1, 0,
+  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
         [(set (Ty DPR:$Vd),
@@ -1885,7 +1883,7 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
 }
 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                   string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
-  : N3V<0, 1, op21_20, op11_8, 1, 0,
+  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
         [(set (Ty DPR:$Vd),
@@ -1915,7 +1913,7 @@ class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                 string OpcodeStr, string Dt,
                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
-  : N3V<1, 1, op21_20, op11_8, 1, 0,
+  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
         [(set (ResTy QPR:$Vd),
@@ -1927,7 +1925,7 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
 class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                   string OpcodeStr, string Dt,
                   ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
-  : N3V<1, 1, op21_20, op11_8, 1, 0,
+  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
         [(set (ResTy QPR:$Vd),
@@ -1959,7 +1957,7 @@ class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                   string OpcodeStr, string Dt,
                   ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
-  : N3V<0, 1, op21_20, op11_8, 1, 0,
+  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$Vd),
         (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
@@ -1972,7 +1970,7 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                     string OpcodeStr, string Dt,
                     ValueType Ty, SDNode MulOp, SDNode ShOp>
-  : N3V<0, 1, op21_20, op11_8, 1, 0,
+  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$Vd),
         (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
@@ -1994,7 +1992,7 @@ class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                   string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
                   SDPatternOperator MulOp, SDPatternOperator ShOp>
-  : N3V<1, 1, op21_20, op11_8, 1, 0,
+  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd),
         (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
@@ -2008,7 +2006,7 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                     string OpcodeStr, string Dt,
                     ValueType ResTy, ValueType OpTy,
                     SDNode MulOp, SDNode ShOp>
-  : N3V<1, 1, op21_20, op11_8, 1, 0,
+  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd),
         (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
@@ -2069,7 +2067,7 @@ class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
                   InstrItinClass itin, string OpcodeStr, string Dt,
                   ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
-  : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
         (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
         OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
@@ -2081,7 +2079,7 @@ class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
 class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                     InstrItinClass itin, string OpcodeStr, string Dt,
                     ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
-  : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
         (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
         OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
@@ -2116,7 +2114,7 @@ class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                  string OpcodeStr, string Dt,
                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
-  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd),
         (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
@@ -2129,7 +2127,7 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                    InstrItinClass itin, string OpcodeStr, string Dt,
                    ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
-  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd),
         (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
@@ -2164,7 +2162,7 @@ class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
              InstrItinClass itin, string OpcodeStr, string Dt,
              ValueType TyQ, ValueType TyD, SDNode OpNode>
-  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
         [(set QPR:$Vd,
@@ -2173,7 +2171,7 @@ class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
 class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                InstrItinClass itin, string OpcodeStr, string Dt,
                ValueType TyQ, ValueType TyD, SDNode OpNode>
-  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
         [(set QPR:$Vd,
@@ -2219,7 +2217,7 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                 string OpcodeStr, string Dt,
                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
-  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
         [(set (ResTy QPR:$Vd),
@@ -2229,7 +2227,7 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                   InstrItinClass itin, string OpcodeStr, string Dt,
                   ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
-  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
         [(set (ResTy QPR:$Vd),
@@ -2288,17 +2286,17 @@ class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
 // Shift by immediate,
 // both double- and quad-register.
 class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
-             Format f, InstrItinClass itin, string OpcodeStr, string Dt,
-             ValueType Ty, SDNode OpNode>
+             Format f, InstrItinClass itin, Operand ImmTy,
+             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
   : N2VImm<op24, op23, op11_8, op7, 0, op4,
-           (outs DPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), f, itin,
+           (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
            OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
            [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
 class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
-             Format f, InstrItinClass itin, string OpcodeStr, string Dt,
-             ValueType Ty, SDNode OpNode>
+             Format f, InstrItinClass itin, Operand ImmTy,
+             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
   : N2VImm<op24, op23, op11_8, op7, 1, op4,
-           (outs QPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), f, itin,
+           (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
            OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
            [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
 
@@ -2315,9 +2313,9 @@ class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
 // Narrow shift by immediate.
 class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
              InstrItinClass itin, string OpcodeStr, string Dt,
-             ValueType ResTy, ValueType OpTy, SDNode OpNode>
+             ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
   : N2VImm<op24, op23, op11_8, op7, op6, op4,
-           (outs DPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, itin,
+           (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
            OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
            [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
                                           (i32 imm:$SIMM))))]>;
@@ -2325,16 +2323,18 @@ class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
 // Shift right by immediate and accumulate,
 // both double- and quad-register.
 class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
-                string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+                Operand ImmTy, string OpcodeStr, string Dt,
+                ValueType Ty, SDNode ShOp>
   : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
-           (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
            OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
            [(set DPR:$Vd, (Ty (add DPR:$src1,
                                 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
 class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
-                string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+                Operand ImmTy, string OpcodeStr, string Dt,
+                ValueType Ty, SDNode ShOp>
   : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
-           (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
            OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
            [(set QPR:$Vd, (Ty (add QPR:$src1,
                                 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
@@ -2342,15 +2342,17 @@ class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
 // Shift by immediate and insert,
 // both double- and quad-register.
 class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
-                Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
+                Operand ImmTy, Format f, string OpcodeStr, string Dt,
+                ValueType Ty,SDNode ShOp>
   : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
-           (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiD,
+           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
            OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
            [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
 class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
-                Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
+                Operand ImmTy, Format f, string OpcodeStr, string Dt,
+                ValueType Ty,SDNode ShOp>
   : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
-           (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiQ,
+           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
            OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
            [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
 
@@ -3010,40 +3012,77 @@ multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
 // Neon 2-register vector shift by immediate,
 //   with f of either N2RegVShLFrm or N2RegVShRFrm
 //   element sizes of 8, 16, 32 and 64 bits:
-multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
-                     InstrItinClass itin, string OpcodeStr, string Dt,
-                     SDNode OpNode, Format f> {
+multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itin, string OpcodeStr, string Dt,
+                       SDNode OpNode> {
+  // 64-bit vector types.
+  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
+                             // imm6 = xxxxxx
+
+  // 128-bit vector types.
+  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
+                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
+                             // imm6 = xxxxxx
+}
+multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itin, string OpcodeStr, string Dt,
+                       SDNode OpNode> {
   // 64-bit vector types.
-  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
+  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
                      OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
-  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
+  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
                      OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
-  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
+  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
                      OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
-  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, f, itin,
+  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
                      OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
                              // imm6 = xxxxxx
 
   // 128-bit vector types.
-  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
+  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
                      OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
-  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
+  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
                      OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
-  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
+  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
                      OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
-  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, f, itin,
+  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
                      OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
                              // imm6 = xxxxxx
 }
@@ -3053,79 +3092,113 @@ multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
 multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
                          string OpcodeStr, string Dt, SDNode ShOp> {
   // 64-bit vector types.
-  def v8i8  : N2VDShAdd<op24, op23, op11_8, 0, op4,
+  def v8i8  : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
                         OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
-  def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4,
+  def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
                         OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
-  def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4,
+  def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
                         OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
-  def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4,
+  def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
                         OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
                              // imm6 = xxxxxx
 
   // 128-bit vector types.
-  def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4,
+  def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
                         OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
-  def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4,
+  def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
                         OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
-  def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4,
+  def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
                         OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
-  def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4,
+  def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
                         OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
                              // imm6 = xxxxxx
 }
 
-
 // Neon Shift-Insert vector operations,
 //   with f of either N2RegVShLFrm or N2RegVShRFrm
 //   element sizes of 8, 16, 32 and 64 bits:
-multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
-                         string OpcodeStr, SDNode ShOp,
-                         Format f> {
+multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                          string OpcodeStr> {
+  // 64-bit vector types.
+  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
+                             // imm6 = xxxxxx
+
+  // 128-bit vector types.
+  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
+                        N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
+                             // imm6 = xxxxxx
+}
+multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                          string OpcodeStr> {
   // 64-bit vector types.
-  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4,
-                        f, OpcodeStr, "8", v8i8, ShOp> {
+  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
+                        N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
-  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4,
-                        f, OpcodeStr, "16", v4i16, ShOp> {
+  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
+                        N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
-  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4,
-                        f, OpcodeStr, "32", v2i32, ShOp> {
+  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
+                        N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
-  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4,
-                        f, OpcodeStr, "64", v1i64, ShOp>;
+  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
+                        N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
                              // imm6 = xxxxxx
 
   // 128-bit vector types.
-  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4,
-                        f, OpcodeStr, "8", v16i8, ShOp> {
+  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
+                        N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
-  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4,
-                        f, OpcodeStr, "16", v8i16, ShOp> {
+  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
+                        N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
-  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4,
-                        f, OpcodeStr, "32", v4i32, ShOp> {
+  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
+                        N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
-  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4,
-                        f, OpcodeStr, "64", v2i64, ShOp>;
+  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
+                        N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
                              // imm6 = xxxxxx
 }
 
@@ -3153,15 +3226,18 @@ multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
                       bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
                       SDNode OpNode> {
   def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
-                    OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> {
+                    OpcodeStr, !strconcat(Dt, "16"),
+                    v8i8, v8i16, shr_imm8, OpNode> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
-                     OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> {
+                     OpcodeStr, !strconcat(Dt, "32"),
+                     v4i16, v4i32, shr_imm16, OpNode> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
-                     OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> {
+                     OpcodeStr, !strconcat(Dt, "64"),
+                     v2i32, v2i64, shr_imm32, OpNode> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
 }
@@ -3697,16 +3773,21 @@ def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
                      (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
                      N3RegFrm, IIC_VCNTiD,
                      "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
-                     [(set DPR:$Vd,
-                       (v2i32 (or (and DPR:$Vn, DPR:$src1),
-                                  (and DPR:$Vm, (vnotd DPR:$src1)))))]>;
+                     [(set DPR:$Vd, (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+
+def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
+                     (and DPR:$Vm, (vnotd DPR:$Vd)))),
+          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+
 def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
                      (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
                      N3RegFrm, IIC_VCNTiQ,
                      "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
-                     [(set QPR:$Vd,
-                       (v4i32 (or (and QPR:$Vn, QPR:$src1),
-                                  (and QPR:$Vm, (vnotq QPR:$src1)))))]>;
+                     [(set QPR:$Vd, (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+
+def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
+                     (and QPR:$Vm, (vnotq QPR:$Vd)))),
+          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
 
 //   VBIF     : Vector Bitwise Insert if False
 //              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
@@ -3917,14 +3998,13 @@ defm VSHLs    : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
 defm VSHLu    : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
                             IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
                             "vshl", "u", int_arm_neon_vshiftu>;
+
 //   VSHL     : Vector Shift Left (Immediate)
-defm VSHLi    : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl,
-                           N2RegVShLFrm>;
+defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
+
 //   VSHR     : Vector Shift Right (Immediate)
-defm VSHRs    : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs,
-                           N2RegVShRFrm>;
-defm VSHRu    : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru,
-                           N2RegVShRFrm>;
+defm VSHRs    : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>;
+defm VSHRu    : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>;
 
 //   VSHLL    : Vector Shift Left Long
 defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
@@ -3957,10 +4037,8 @@ defm VRSHLu   : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
                             IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
                             "vrshl", "u", int_arm_neon_vrshiftu>;
 //   VRSHR    : Vector Rounding Shift Right
-defm VRSHRs   : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs,
-                           N2RegVShRFrm>;
-defm VRSHRu   : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru,
-                           N2RegVShRFrm>;
+defm VRSHRs   : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>;
+defm VRSHRu   : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>;
 
 //   VRSHRN   : Vector Rounding Shift Right and Narrow
 defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
@@ -3974,13 +4052,11 @@ defm VQSHLu   : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
                             IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
                             "vqshl", "u", int_arm_neon_vqshiftu>;
 //   VQSHL    : Vector Saturating Shift Left (Immediate)
-defm VQSHLsi  : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls,
-                           N2RegVShLFrm>;
-defm VQSHLui  : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu,
-                           N2RegVShLFrm>;
+defm VQSHLsi  : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
+defm VQSHLui  : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
+
 //   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
-defm VQSHLsu  : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu,
-                           N2RegVShLFrm>;
+defm VQSHLsu  : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
 
 //   VQSHRN   : Vector Saturating Shift Right and Narrow
 defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
@@ -4018,9 +4094,10 @@ defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
 defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
 
 //   VSLI     : Vector Shift Left and Insert
-defm VSLI     : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>;
+defm VSLI     : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
+
 //   VSRI     : Vector Shift Right and Insert
-defm VSRI     : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>;
+defm VSRI     : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
 
 // Vector Absolute and Saturating Absolute.
 
@@ -4362,14 +4439,8 @@ def  VDUP8q   : VDUPQ<0b11101110, 0b00, "8", v16i8>;
 def  VDUP16q  : VDUPQ<0b11101010, 0b01, "16", v8i16>;
 def  VDUP32q  : VDUPQ<0b11101010, 0b00, "32", v4i32>;
 
-def  VDUPfd   : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$V), (ins GPR:$R),
-                      IIC_VMOVIS, "vdup", "32", "$V, $R",
-                      [(set DPR:$V, (v2f32 (NEONvdup
-                                              (f32 (bitconvert GPR:$R)))))]>;
-def  VDUPfq   : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$V), (ins GPR:$R),
-                      IIC_VMOVIS, "vdup", "32", "$V, $R",
-                      [(set QPR:$V, (v4f32 (NEONvdup
-                                              (f32 (bitconvert GPR:$R)))))]>;
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>;
+def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
 
 //   VDUP     : Vector Duplicate Lane (from scalar to all elements)
 
@@ -4397,9 +4468,6 @@ def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> {
 def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> {
   let Inst{19} = lane{0};
 }
-def VDUPLNfd  : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32> {
-  let Inst{19} = lane{0};
-}
 def VDUPLN8q  : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> {
   let Inst{19-17} = lane{2-0};
 }
@@ -4409,9 +4477,12 @@ def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> {
 def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> {
   let Inst{19} = lane{0};
 }
-def VDUPLNfq  : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32> {
-  let Inst{19} = lane{0};
-}
+
+def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+          (VDUPLN32d DPR:$Vm, imm:$lane)>;
+
+def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+          (VDUPLN32q DPR:$Vm, imm:$lane)>;
 
 def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
           (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
@@ -4426,7 +4497,7 @@ def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
                                     (DSubReg_i32_reg imm:$lane))),
                             (SubReg_i32_lane imm:$lane)))>;
 def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
-          (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src,
+          (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
                                    (DSubReg_i32_reg imm:$lane))),
                            (SubReg_i32_lane imm:$lane)))>;
 
@@ -4517,12 +4588,12 @@ class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
 def VREV64d8  : VREV64D<0b00, "vrev64", "8", v8i8>;
 def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
 def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
-def VREV64df  : VREV64D<0b10, "vrev64", "32", v2f32>;
+def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
 
 def VREV64q8  : VREV64Q<0b00, "vrev64", "8", v16i8>;
 def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
 def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
-def VREV64qf  : VREV64Q<0b10, "vrev64", "32", v4f32>;
+def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
 
 //   VREV32   : Vector Reverse elements within 32-bit words
 
@@ -4628,8 +4699,8 @@ def VEXTq32 : VEXTq<"vext", "32", v4i32> {
   let Inst{9-8}    = 0b00;
 }
 def VEXTqf  : VEXTq<"vext", "32", v4f32> {
-  let Inst{11}    = index{0};
-  let Inst{10-8}  = 0b000;
+  let Inst{11-10} = index{1-0};
+  let Inst{9-8}   = 0b00;
 }
 
 //   VTRN     : Vector Transpose
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 826ef46..8c542fe 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -27,22 +27,22 @@ def imm_comp_XFORM : SDNodeXForm<imm, [{
 }]>;
 
 /// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
-def imm0_7 : PatLeaf<(i32 imm), [{
-  return (uint32_t)N->getZExtValue() < 8;
+def imm0_7 : ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 8;
 }]>;
 def imm0_7_neg : PatLeaf<(i32 imm), [{
   return (uint32_t)-N->getZExtValue() < 8;
 }], imm_neg_XFORM>;
 
-def imm0_255 : PatLeaf<(i32 imm), [{
-  return (uint32_t)N->getZExtValue() < 256;
+def imm0_255 : ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 256;
 }]>;
 def imm0_255_comp : PatLeaf<(i32 imm), [{
   return ~((uint32_t)N->getZExtValue()) < 256;
 }]>;
 
-def imm8_255 : PatLeaf<(i32 imm), [{
-  return (uint32_t)N->getZExtValue() >= 8 && (uint32_t)N->getZExtValue() < 256;
+def imm8_255 : ImmLeaf<i32, [{
+  return Imm >= 8 && Imm < 256;
 }]>;
 def imm8_255_neg : PatLeaf<(i32 imm), [{
   unsigned Val = -N->getZExtValue();
@@ -369,6 +369,15 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
     let Inst{2-0} = 0b000;
   }
 
+  def tBX_Rm : TI<(outs), (ins pred:$p, GPR:$Rm), IIC_Br, "bx${p}\t$Rm",
+                  [/* for disassembly only */]>,
+               T1Special<{1,1,0,?}> {
+    // A6.2.3 & A8.6.25
+    bits<4> Rm;
+    let Inst{6-3} = Rm;
+    let Inst{2-0} = 0b000;
+  }
+
   // Alternative return instruction used by vararg functions.
   def tBX_RET_vararg : TI<(outs), (ins tGPR:$Rm),
                           IIC_Br, "bx\t$Rm",
@@ -712,6 +721,19 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
   let Inst{7-0}  = addr;
 }
 
+// FIXME: Remove this entry when the above ldr.n workaround is fixed.
+// For disassembly use only.
+def tLDRpciDIS : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+                       "ldr", "\t$Rt, $addr",
+                       [/* disassembly only */]>,
+                 T1Encoding<{0,1,0,0,1,?}> {
+  // A6.2 & A8.6.59
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0}  = addr;
+}
+
 // A8.6.194 & A8.6.192
 defm tSTR  : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
                                 t_addrmode_is4, AddrModeT1_4,
@@ -1175,10 +1197,18 @@ def tREVSH :                    // A8.6.136
                  "revsh", "\t$Rd, $Rm",
                  [(set tGPR:$Rd,
                        (sext_inreg
-                         (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)),
+                         (or (srl tGPR:$Rm, (i32 8)),
                              (shl tGPR:$Rm, (i32 8))), i16))]>,
                  Requires<[IsThumb, IsThumb1Only, HasV6]>;
 
+def : T1Pat<(sext_inreg (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)),
+                            (shl tGPR:$Rm, (i32 8))), i16),
+            (tREVSH tGPR:$Rm)>,
+      Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def : T1Pat<(sra (bswap tGPR:$Rm), (i32 16)), (tREVSH tGPR:$Rm)>,
+      Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
 // Rotate right register
 def tROR :                      // A8.6.139
   T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
@@ -1322,10 +1352,8 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
 // Move between coprocessor and ARM core register -- for disassembly only
 //
 
-class tMovRCopro<string opc, bit direction>
-  : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
-                       GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
-          !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+class tMovRCopro<string opc, bit direction, dag oops, dag iops>
+  : T1Cop<oops, iops, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
           [/* For disassembly only; pattern left blank */]> {
   let Inst{27-24} = 0b1110;
   let Inst{20} = direction;
@@ -1346,8 +1374,12 @@ class tMovRCopro<string opc, bit direction>
   let Inst{19-16} = CRn;
 }
 
-def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>;
-def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>;
+def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
+           (outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn,
+                        c_imm:$CRm, i32imm:$opc2)>;
+def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
+           (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn,
+                                c_imm:$CRm, i32imm:$opc2)>;
 
 class tMovRRCopro<string opc, bit direction>
   : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
@@ -1420,7 +1452,7 @@ def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br,
 // from some other function to get here, and we're using the stack frame for the
 // containing function to save/restore registers, we can't keep anything live in
 // regs across the eh_sjlj_setjmp(), else it will almost certainly have been
-// tromped upon when we get here from a longjmp(). We force everthing out of
+// tromped upon when we get here from a longjmp(). We force everything out of
 // registers except for our own input by listing the relevant registers in
 // Defs. By doing so, we also cause the prologue/epilogue code to actively
 // preserve all of the callee-saved resgisters, which is exactly what we want.
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 0e01be5..600a121 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -44,7 +44,9 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
 // t2_so_imm - Match a 32-bit immediate operand, which is an
 // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
 // immediate splatted into multiple bytes of the word.
-def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]> {
+def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
+    return ARM_AM::getT2SOImmVal(Imm) != -1;
+  }]> {
   let EncoderMethod = "getT2SOImmOpValue";
 }
 
@@ -61,49 +63,15 @@ def t2_so_imm_neg : Operand<i32>,
   return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
 }], t2_so_imm_neg_XFORM>;
 
-// Break t2_so_imm's up into two pieces.  This handles immediates with up to 16
-// bits set in them.  This uses t2_so_imm2part to match and t2_so_imm2part_[12]
-// to get the first/second pieces.
-def t2_so_imm2part : Operand<i32>,
-                  PatLeaf<(imm), [{
-      return ARM_AM::isT2SOImmTwoPartVal((unsigned)N->getZExtValue());
-    }]> {
-}
-
-def t2_so_imm2part_1 : SDNodeXForm<imm, [{
-  unsigned V = ARM_AM::getT2SOImmTwoPartFirst((unsigned)N->getZExtValue());
-  return CurDAG->getTargetConstant(V, MVT::i32);
-}]>;
-
-def t2_so_imm2part_2 : SDNodeXForm<imm, [{
-  unsigned V = ARM_AM::getT2SOImmTwoPartSecond((unsigned)N->getZExtValue());
-  return CurDAG->getTargetConstant(V, MVT::i32);
-}]>;
-
-def t2_so_neg_imm2part : Operand<i32>, PatLeaf<(imm), [{
-      return ARM_AM::isT2SOImmTwoPartVal(-(int)N->getZExtValue());
-    }]> {
-}
-
-def t2_so_neg_imm2part_1 : SDNodeXForm<imm, [{
-  unsigned V = ARM_AM::getT2SOImmTwoPartFirst(-(int)N->getZExtValue());
-  return CurDAG->getTargetConstant(V, MVT::i32);
-}]>;
-
-def t2_so_neg_imm2part_2 : SDNodeXForm<imm, [{
-  unsigned V = ARM_AM::getT2SOImmTwoPartSecond(-(int)N->getZExtValue());
-  return CurDAG->getTargetConstant(V, MVT::i32);
-}]>;
-
 /// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31].
-def imm1_31 : PatLeaf<(i32 imm), [{
-  return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32;
+def imm1_31 : ImmLeaf<i32, [{
+  return (int32_t)Imm >= 1 && (int32_t)Imm < 32;
 }]>;
 
 /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
 def imm0_4095 : Operand<i32>,
-                PatLeaf<(i32 imm), [{
-  return (uint32_t)N->getZExtValue() < 4096;
+                ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 4096;
 }]>;
 
 def imm0_4095_neg : PatLeaf<(i32 imm), [{
@@ -118,6 +86,11 @@ def imm0_255_not : PatLeaf<(i32 imm), [{
   return (uint32_t)(~N->getZExtValue()) < 255;
 }], imm_comp_XFORM>;
 
+def lo5AllOne : PatLeaf<(i32 imm), [{
+  // Returns true if all low 5-bits are 1.
+  return (((uint32_t)N->getZExtValue()) & 0x1FUL) == 0x1FUL;
+}]>;
+
 // Define Thumb2 specific addressing modes.
 
 // t2addrmode_imm12  := reg + imm12
@@ -129,6 +102,12 @@ def t2addrmode_imm12 : Operand<i32>,
   let ParserMatchClass = MemMode5AsmOperand;
 }
 
+// t2ldrlabel  := imm12
+def t2ldrlabel : Operand<i32> {
+  let EncoderMethod = "getAddrModeImm12OpValue";
+}
+
+
 // ADR instruction labels.
 def t2adrlabel : Operand<i32> {
   let EncoderMethod = "getT2AdrLabelOpValue";
@@ -173,6 +152,15 @@ def t2addrmode_so_reg : Operand<i32>,
   let ParserMatchClass = MemMode5AsmOperand;
 }
 
+// t2addrmode_reg := reg
+// Used by load/store exclusive instructions. Useful to enable right assembly
+// parsing and printing. Not used for any codegen matching.
+//
+def t2addrmode_reg : Operand<i32> {
+  let PrintMethod = "printAddrMode7Operand";
+  let MIOperandInfo = (ops tGPR);
+  let ParserMatchClass = MemMode7AsmOperand;
+}
 
 //===----------------------------------------------------------------------===//
 // Multiclass helpers...
@@ -700,49 +688,27 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
      let Inst{24-21} = opcod;
    }
 }
+}
 
 // Carry setting variants
-let isCodeGenOnly = 1, Defs = [CPSR] in {
-multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
-                               bit Commutable = 0> {
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1 in {
+multiclass T2I_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
    // shifted imm
-   def ri : T2sTwoRegImm<
-                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
-                 opc, "\t$Rd, $Rn, $imm",
-                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
-                 Requires<[IsThumb2]> {
-     let Inst{31-27} = 0b11110;
-     let Inst{25} = 0;
-     let Inst{24-21} = opcod;
-     let Inst{20} = 1; // The S bit.
-     let Inst{15} = 0;
-   }
+   def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
+                Size4Bytes, IIC_iALUi,
+                [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>;
    // register
-   def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
-                 opc, ".w\t$Rd, $Rn, $Rm",
-                 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
-                 Requires<[IsThumb2]> {
+   def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+                Size4Bytes, IIC_iALUr,
+                [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
      let isCommutable = Commutable;
-     let Inst{31-27} = 0b11101;
-     let Inst{26-25} = 0b01;
-     let Inst{24-21} = opcod;
-     let Inst{20} = 1; // The S bit.
-     let Inst{14-12} = 0b000; // imm3
-     let Inst{7-6} = 0b00; // imm2
-     let Inst{5-4} = 0b00; // type
    }
    // shifted register
-   def rs : T2sTwoRegShiftedReg<
-                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
-                 IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
-                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
-                 Requires<[IsThumb2]> {
-     let Inst{31-27} = 0b11101;
-     let Inst{26-25} = 0b01;
-     let Inst{24-21} = opcod;
-     let Inst{20} = 1; // The S bit.
-   }
-}
+   def rs : t2PseudoInst<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+                Size4Bytes, IIC_iALUsi,
+                [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>;
 }
 }
 
@@ -864,6 +830,7 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
     let Inst{15-12} = Rt;
 
     bits<17> addr;
+    let addr{12}    = 1;           // add = TRUE
     let Inst{19-16} = addr{16-13}; // Rn
     let Inst{23}    = addr{12};    // U
     let Inst{11-0}  = addr{11-0};  // imm
@@ -911,7 +878,7 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
   }
 
   // FIXME: Is the pci variant actually needed?
-  def pci : T2Ipc <(outs GPR:$Rt), (ins i32imm:$addr), iii,
+  def pci : T2Ipc <(outs GPR:$Rt), (ins t2ldrlabel:$addr), iii,
                    opc, ".w\t$Rt, $addr",
                    [(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
     let isReMaterializable = 1;
@@ -944,6 +911,7 @@ multiclass T2I_st<bits<2> opcod, string opc,
     let Inst{15-12} = Rt;
 
     bits<17> addr;
+    let addr{12}    = 1;           // add = TRUE
     let Inst{19-16} = addr{16-13}; // Rn
     let Inst{23}    = addr{12};    // U
     let Inst{11-0}  = addr{11-0};  // imm
@@ -1398,7 +1366,7 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn),
 // for disassembly only.
 // Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
 class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
-  : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+  : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
           "\t$Rt, $addr", []> {
   let Inst{31-27} = 0b11111;
   let Inst{26-25} = 0b00;
@@ -1440,42 +1408,48 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
 def t2STR_PRE  : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb),
                             (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
                             AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
-                         "str", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
+                         "str", "\t$Rt, [$Rn, $addr]!",
+                         "$Rn = $base_wb,@earlyclobber $base_wb",
              [(set GPR:$base_wb,
                    (pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
 
 def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb),
                             (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
                             AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
-                          "str", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
+                          "str", "\t$Rt, [$Rn], $addr",
+                          "$Rn = $base_wb,@earlyclobber $base_wb",
              [(set GPR:$base_wb,
                   (post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
 
 def t2STRH_PRE  : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb),
                             (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
                             AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
-                        "strh", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
+                        "strh", "\t$Rt, [$Rn, $addr]!",
+                        "$Rn = $base_wb,@earlyclobber $base_wb",
         [(set GPR:$base_wb,
               (pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
 
 def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb),
                             (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
                             AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
-                         "strh", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
+                         "strh", "\t$Rt, [$Rn], $addr",
+                         "$Rn = $base_wb,@earlyclobber $base_wb",
        [(set GPR:$base_wb,
              (post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
 
 def t2STRB_PRE  : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb),
                             (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
                             AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
-                        "strb", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
+                        "strb", "\t$Rt, [$Rn, $addr]!",
+                        "$Rn = $base_wb,@earlyclobber $base_wb",
          [(set GPR:$base_wb,
                (pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
 
 def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
                             (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
                             AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
-                         "strb", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
+                         "strb", "\t$Rt, [$Rn], $addr",
+                         "$Rn = $base_wb,@earlyclobber $base_wb",
         [(set GPR:$base_wb,
               (post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
 
@@ -1483,7 +1457,7 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
 // only.
 // Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
 class T2IstT<bits<2> type, string opc, InstrItinClass ii>
-  : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+  : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
           "\t$Rt, $addr", []> {
   let Inst{31-27} = 0b11111;
   let Inst{26-25} = 0b00;
@@ -1508,20 +1482,20 @@ def t2STRHT  : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
 // ldrd / strd pre / post variants
 // For disassembly only.
 
-def t2LDRD_PRE  : T2Ii8s4<1, 1, 1, (outs GPR:$Rt, GPR:$Rt2),
+def t2LDRD_PRE  : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2),
                  (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
                  "ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
 
-def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$Rt, GPR:$Rt2),
+def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2),
                  (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
                  "ldrd", "\t$Rt, $Rt2, [$base], $imm", []>;
 
 def t2STRD_PRE  : T2Ii8s4<1, 1, 0, (outs),
-                 (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+                 (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
                  IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
 
 def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs),
-                 (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+                 (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
                  IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>;
 
 // T2Ipl (Preload Data/Instruction) signals the memory system of possible future
@@ -1541,6 +1515,7 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
     let Inst{15-12} = 0b1111;
 
     bits<17> addr;
+    let addr{12}    = 1;           // add = TRUE
     let Inst{19-16} = addr{16-13}; // Rn
     let Inst{23}    = addr{12};    // U
     let Inst{11-0}  = addr{11-0};  // imm12
@@ -1813,10 +1788,8 @@ defm t2ADC  : T2I_adde_sube_irs<0b1010, "adc",
                           BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
 defm t2SBC  : T2I_adde_sube_irs<0b1011, "sbc",
                           BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
-defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc",
-                          BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
-defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc",
-                          BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
+defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
+defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
 
 // RSB
 defm t2RSB  : T2I_rbin_irs  <0b1110, "rsb",
@@ -1847,9 +1820,14 @@ def : T2Pat<(addc       rGPR:$src, t2_so_imm_neg:$imm),
 // Effectively, the inverse interpretation of the carry flag already accounts
 // for part of the negation.
 let AddedComplexity = 1 in
-def : T2Pat<(adde       rGPR:$src, imm0_255_not:$imm),
+def : T2Pat<(adde_dead_carry       rGPR:$src, imm0_255_not:$imm),
+            (t2SBCri    rGPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(adde_dead_carry       rGPR:$src, t2_so_imm_not:$imm),
+            (t2SBCri    rGPR:$src, t2_so_imm_not:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(adde_live_carry       rGPR:$src, imm0_255_not:$imm),
             (t2SBCSri   rGPR:$src, imm0_255_not:$imm)>;
-def : T2Pat<(adde       rGPR:$src, t2_so_imm_not:$imm),
+def : T2Pat<(adde_live_carry       rGPR:$src, t2_so_imm_not:$imm),
             (t2SBCSri   rGPR:$src, t2_so_imm_not:$imm)>;
 
 // Select Bytes -- for disassembly only
@@ -2052,6 +2030,10 @@ defm t2LSR  : T2I_sh_ir<0b01, "lsr", BinOpFrag<(srl  node:$LHS, node:$RHS)>>;
 defm t2ASR  : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra  node:$LHS, node:$RHS)>>;
 defm t2ROR  : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
 
+// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
+def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
+          (t2RORrr rGPR:$lhs, rGPR:$rhs)>;
+
 let Uses = [CPSR] in {
 def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
                    "rrx", "\t$Rd, $Rm",
@@ -2140,10 +2122,12 @@ def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
                 IIC_iUNAsi, "bfc", "\t$Rd, $imm",
                 [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
   let Inst{31-27} = 0b11110;
+  let Inst{26} = 0; // should be 0.
   let Inst{25} = 1;
   let Inst{24-20} = 0b10110;
   let Inst{19-16} = 0b1111; // Rn
   let Inst{15} = 0;
+  let Inst{5} = 0; // should be 0.
 
   bits<10> imm;
   let msb{4-0} = imm{9-5};
@@ -2176,9 +2160,11 @@ let Constraints = "$src = $Rd" in {
                   [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn,
                                    bf_inv_mask_imm:$imm))]> {
     let Inst{31-27} = 0b11110;
+    let Inst{26} = 0; // should be 0.
     let Inst{25} = 1;
     let Inst{24-20} = 0b10110;
     let Inst{15} = 0;
+    let Inst{5} = 0; // should be 0.
 
     bits<10> imm;
     let msb{4-0} = imm{9-5};
@@ -2193,9 +2179,11 @@ let Constraints = "$src = $Rd" in {
                   IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width",
                   []> {
     let Inst{31-27} = 0b11110;
+    let Inst{26} = 0; // should be 0.
     let Inst{25} = 1;
     let Inst{24-20} = 0b10110;
     let Inst{15} = 0;
+    let Inst{5} = 0; // should be 0.
 
     bits<5> lsbit;
     bits<5> width;
@@ -2607,9 +2595,15 @@ def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
                        "revsh", ".w\t$Rd, $Rm",
                  [(set rGPR:$Rd,
                     (sext_inreg
-                      (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)),
+                      (or (srl rGPR:$Rm, (i32 8)),
                           (shl rGPR:$Rm, (i32 8))), i16))]>;
 
+def : T2Pat<(sext_inreg (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)),
+                            (shl rGPR:$Rm, (i32 8))), i16),
+            (t2REVSH rGPR:$Rm)>;
+
+def : T2Pat<(sra (bswap rGPR:$Rm), (i32 16)), (t2REVSH rGPR:$Rm)>;
+
 def t2PKHBT : T2ThreeReg<
             (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
                   IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
@@ -2843,9 +2837,9 @@ class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   let Inst{5-4} = opcod;
   let Inst{3-0} = 0b1111;
 
-  bits<4> Rn;
+  bits<4> addr;
   bits<4> Rt;
-  let Inst{19-16} = Rn;
+  let Inst{19-16} = addr;
   let Inst{15-12} = Rt;
 }
 class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
@@ -2859,37 +2853,37 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   let Inst{5-4} = opcod;
 
   bits<4> Rd;
-  bits<4> Rn;
+  bits<4> addr;
   bits<4> Rt;
-  let Inst{11-8}  = Rd;
-  let Inst{19-16} = Rn;
+  let Inst{3-0}  = Rd;
+  let Inst{19-16} = addr;
   let Inst{15-12} = Rt;
 }
 
 let mayLoad = 1 in {
-def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
-                         Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, [$Rn]",
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone,
+                         Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, $addr",
                          "", []>;
-def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
-                         Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, [$Rn]",
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone,
+                         Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, $addr",
                          "", []>;
-def t2LDREX  : Thumb2I<(outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
+def t2LDREX  : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone,
                        Size4Bytes, NoItinerary,
-                       "ldrex", "\t$Rt, [$Rn]", "",
+                       "ldrex", "\t$Rt, $addr", "",
                       []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-20} = 0b0000101;
   let Inst{11-8} = 0b1111;
   let Inst{7-0} = 0b00000000; // imm8 = 0
 
-  bits<4> Rn;
   bits<4> Rt;
-  let Inst{19-16} = Rn;
+  bits<4> addr;
+  let Inst{19-16} = addr;
   let Inst{15-12} = Rt;
 }
-def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins rGPR:$Rn),
+def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_reg:$addr),
                          AddrModeNone, Size4Bytes, NoItinerary,
-                         "ldrexd", "\t$Rt, $Rt2, [$Rn]", "",
+                         "ldrexd", "\t$Rt, $Rt2, $addr", "",
                          [], {?, ?, ?, ?}> {
   bits<4> Rt2;
   let Inst{11-8} = Rt2;
@@ -2897,31 +2891,31 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins rGPR:$Rn),
 }
 
 let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
-def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
-                         AddrModeNone, Size4Bytes, NoItinerary,
-                         "strexb", "\t$Rd, $Rt, [$Rn]", "", []>;
-def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
-                         AddrModeNone, Size4Bytes, NoItinerary,
-                         "strexh", "\t$Rd, $Rt, [$Rn]", "", []>;
-def t2STREX  : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
-                       AddrModeNone, Size4Bytes, NoItinerary,
-                       "strex", "\t$Rd, $Rt, [$Rn]", "",
-                      []> {
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
+                  AddrModeNone, Size4Bytes, NoItinerary,
+                  "strexb", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
+                  AddrModeNone, Size4Bytes, NoItinerary,
+                  "strexh", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREX  : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
+                  AddrModeNone, Size4Bytes, NoItinerary,
+                  "strex", "\t$Rd, $Rt, $addr", "",
+                  []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-20} = 0b0000100;
   let Inst{7-0} = 0b00000000; // imm8 = 0
 
   bits<4> Rd;
-  bits<4> Rn;
+  bits<4> addr;
   bits<4> Rt;
   let Inst{11-8}  = Rd;
-  let Inst{19-16} = Rn;
+  let Inst{19-16} = addr;
   let Inst{15-12} = Rt;
 }
 def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
-                         (ins rGPR:$Rt, rGPR:$Rt2, rGPR:$Rn),
+                         (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_reg:$addr),
                          AddrModeNone, Size4Bytes, NoItinerary,
-                         "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]", "", [],
+                         "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
                          {?, ?, ?, ?}> {
   bits<4> Rt2;
   let Inst{11-8} = Rt2;
@@ -2965,7 +2959,7 @@ let isCall = 1,
 //   here, and we're using the stack frame for the containing function to
 //   save/restore registers, we can't keep anything live in regs across
 //   the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
-//   when we get here from a longjmp(). We force everthing out of registers
+//   when we get here from a longjmp(). We force everything out of registers
 //   except for our own input by listing the relevant registers in Defs. By
 //   doing so, we also cause the prologue/epilogue code to actively preserve
 //   all of the callee-saved resgisters, which is exactly what we want.
@@ -3238,19 +3232,20 @@ class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
 
   bits<4> Rn;
   let Inst{19-16} = Rn;
+  let Inst{15-0} = 0xc000;
 }
 
 def t2RFEDBW : T2RFE<0b111010000011,
-                   (outs), (ins rGPR:$Rn), NoItinerary, "rfedb", "\t$Rn!",
+                   (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn!",
                    [/* For disassembly only; pattern left blank */]>;
 def t2RFEDB  : T2RFE<0b111010000001,
-                   (outs), (ins rGPR:$Rn), NoItinerary, "rfeab", "\t$Rn",
+                   (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn",
                    [/* For disassembly only; pattern left blank */]>;
 def t2RFEIAW : T2RFE<0b111010011011,
-                   (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn!",
+                   (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn!",
                    [/* For disassembly only; pattern left blank */]>;
 def t2RFEIA  : T2RFE<0b111010011001,
-                   (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn",
+                   (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn",
                    [/* For disassembly only; pattern left blank */]>;
 
 //===----------------------------------------------------------------------===//
@@ -3352,10 +3347,8 @@ def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */,
 // Move between coprocessor and ARM core register -- for disassembly only
 //
 
-class t2MovRCopro<string opc, bit direction>
-  : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
-                       GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
-          !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+class t2MovRCopro<string opc, bit direction, dag oops, dag iops>
+  : T2Cop<oops, iops, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
           [/* For disassembly only; pattern left blank */]> {
   let Inst{27-24} = 0b1110;
   let Inst{20} = direction;
@@ -3376,8 +3369,12 @@ class t2MovRCopro<string opc, bit direction>
   let Inst{19-16} = CRn;
 }
 
-def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */>;
-def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */>;
+def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */,
+             (outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn,
+                          c_imm:$CRm, i32imm:$opc2)>;
+def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */,
+             (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn,
+                                  c_imm:$CRm, i32imm:$opc2)>;
 
 class t2MovRRCopro<string opc, bit direction>
   : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 2990283..376bd96 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -101,14 +101,6 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
     let Inst{21}    = 1;          // Writeback
     let Inst{20}    = L_bit;
   }
-  def DDB :
-    AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
-          IndexModeNone, itin,
-          !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
-    let Inst{24-23} = 0b10;       // Decrement Before
-    let Inst{21}    = 0;          // No writeback
-    let Inst{20}    = L_bit;
-  }
   def DDB_UPD :
     AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
           IndexModeUpd, itin_upd,
@@ -143,18 +135,6 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
     // VFP pipelines.
     let D = VFPNeonDomain;
   }
-  def SDB :
-    AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
-          IndexModeNone, itin,
-          !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
-    let Inst{24-23} = 0b10;       // Decrement Before
-    let Inst{21}    = 0;          // No writeback
-    let Inst{20}    = L_bit;
-
-    // Some single precision VFP instructions may be executed on both NEON and
-    // VFP pipelines.
-    let D = VFPNeonDomain;
-  }
   def SDB_UPD :
     AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
           IndexModeUpd, itin_upd,
@@ -467,6 +447,10 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010,
 
   let Inst{6-5}   = 0b00;
   let Inst{3-0}   = 0b0000;
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
 }
 
 def VMOVSR : AVConv4I<0b11100000, 0b1010,
@@ -484,6 +468,10 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010,
 
   let Inst{6-5}   = 0b00;
   let Inst{3-0}   = 0b0000;
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
 }
 
 let neverHasSideEffects = 1 in {
@@ -503,6 +491,10 @@ def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
   let Inst{19-16} = Rt2;
 
   let Inst{7-6} = 0b00;
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
 }
 
 def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
@@ -510,6 +502,10 @@ def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
                  IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
                  [/* For disassembly only; pattern left blank */]> {
   let Inst{7-6} = 0b00;
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
 }
 } // neverHasSideEffects
 
@@ -532,6 +528,10 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
   let Inst{19-16} = Rt2;
 
   let Inst{7-6}   = 0b00;
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
 }
 
 let neverHasSideEffects = 1 in
@@ -540,6 +540,10 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
                 IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
                 [/* For disassembly only; pattern left blank */]> {
   let Inst{7-6} = 0b00;
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
 }
 
 // FMRDH: SPR -> GPR
@@ -972,33 +976,15 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
 //
 
 let neverHasSideEffects = 1 in {
-def VMOVDcc  : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
-                    (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
-                    IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm",
+def VMOVDcc  : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p),
+                    Size4Bytes, IIC_fpUNA64,
                     [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
                  RegConstraint<"$Dn = $Dd">;
 
-def VMOVScc  : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
-                    (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
-                    IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm",
+def VMOVScc  : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p),
+                    Size4Bytes, IIC_fpUNA32,
                     [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
                  RegConstraint<"$Sn = $Sd">;
-
-def VNEGDcc  : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
-                    (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
-                    IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
-                    [/*(set DPR:$Dd, (ARMcneg DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
-                 RegConstraint<"$Dn = $Dd">;
-
-def VNEGScc  : ASuI<0b11101, 0b11, 0b0001, 0b01, 0,
-                    (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
-                    IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
-                    [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
-                 RegConstraint<"$Sn = $Sd"> {
-  // Some single precision VFP instructions may be executed on both NEON and
-  // VFP pipelines on A8.
-  let D = VFPNeonA8Domain;
-}
 } // neverHasSideEffects
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index d9dc5cd..df89fad 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -79,7 +79,7 @@ namespace {
       unsigned Position;
       MachineBasicBlock::iterator MBBI;
       bool Merged;
-      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 
+      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
                       MachineBasicBlock::iterator i)
         : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
     };
@@ -174,7 +174,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
     switch (Mode) {
     default: llvm_unreachable("Unhandled submode!");
     case ARM_AM::ia: return ARM::VLDMSIA;
-    case ARM_AM::db: return ARM::VLDMSDB;
+    case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
     }
     break;
   case ARM::VSTRS:
@@ -182,7 +182,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
     switch (Mode) {
     default: llvm_unreachable("Unhandled submode!");
     case ARM_AM::ia: return ARM::VSTMSIA;
-    case ARM_AM::db: return ARM::VSTMSDB;
+    case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
     }
     break;
   case ARM::VLDRD:
@@ -190,7 +190,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
     switch (Mode) {
     default: llvm_unreachable("Unhandled submode!");
     case ARM_AM::ia: return ARM::VLDMDIA;
-    case ARM_AM::db: return ARM::VLDMDDB;
+    case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
     }
     break;
   case ARM::VSTRD:
@@ -198,7 +198,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
     switch (Mode) {
     default: llvm_unreachable("Unhandled submode!");
     case ARM_AM::ia: return ARM::VSTMDIA;
-    case ARM_AM::db: return ARM::VSTMDDB;
+    case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
     }
     break;
   }
@@ -246,13 +246,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
   case ARM::t2LDMDB_UPD:
   case ARM::t2STMDB:
   case ARM::t2STMDB_UPD:
-  case ARM::VLDMSDB:
   case ARM::VLDMSDB_UPD:
-  case ARM::VSTMSDB:
   case ARM::VSTMSDB_UPD:
-  case ARM::VLDMDDB:
   case ARM::VLDMDDB_UPD:
-  case ARM::VSTMDDB:
   case ARM::VSTMDDB_UPD:
     return ARM_AM::db;
 
@@ -312,6 +308,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
     // VLDM/VSTM do not support DB mode without also updating the base reg.
     Mode = ARM_AM::db;
   else if (Offset != 0) {
+    // Check if this is a supported opcode before we insert instructions to
+    // calculate a new base register.
+    if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
+
     // If starting offset isn't zero, insert a MI to materialize a new base.
     // But only do so if it is cost effective, i.e. merging more than two
     // loads / stores.
@@ -354,6 +354,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
   bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
                 Opcode == ARM::VLDRD);
   Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
+  if (!Opcode) return false;
   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
     .addReg(Base, getKillRegState(BaseKill))
     .addImm(Pred).addReg(PredReg);
@@ -453,6 +454,25 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
   unsigned PRegNum = PMO.isUndef() ? UINT_MAX
     : getARMRegisterNumbering(PReg);
   unsigned Count = 1;
+  unsigned Limit = ~0U;
+
+  // vldm / vstm limit are 32 for S variants, 16 for D variants.
+
+  switch (Opcode) {
+  default: break;
+  case ARM::VSTRS:
+    Limit = 32;
+    break;
+  case ARM::VSTRD:
+    Limit = 16;
+    break;
+  case ARM::VLDRD:
+    Limit = 16;
+    break;
+  case ARM::VLDRS:
+    Limit = 32;
+    break;
+  }
 
   for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
     int NewOffset = MemOps[i].Offset;
@@ -460,13 +480,13 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
     unsigned Reg = MO.getReg();
     unsigned RegNum = MO.isUndef() ? UINT_MAX
       : getARMRegisterNumbering(Reg);
-    // Register numbers must be in ascending order.  For VFP, the registers
-    // must also be consecutive and there is a limit of 16 double-word
-    // registers per instruction.
+    // Register numbers must be in ascending order. For VFP / NEON load and
+    // store multiples, the registers must also be consecutive and within the
+    // limit on the number of registers per instruction.
     if (Reg != ARM::SP &&
         NewOffset == Offset + (int)Size &&
-        ((isNotVFP && RegNum > PRegNum)
-         || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
+        ((isNotVFP && RegNum > PRegNum) ||
+         ((Count < Limit) && RegNum == PRegNum+1))) {
       Offset += Size;
       PRegNum = RegNum;
       ++Count;
@@ -567,14 +587,10 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
   case ARM::t2STMIA:
   case ARM::t2STMDB:
   case ARM::VLDMSIA:
-  case ARM::VLDMSDB:
   case ARM::VSTMSIA:
-  case ARM::VSTMSDB:
     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
   case ARM::VLDMDIA:
-  case ARM::VLDMDDB:
   case ARM::VSTMDIA:
-  case ARM::VSTMDDB:
     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
   }
 }
@@ -624,7 +640,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
     }
     break;
   case ARM::VLDMSIA:
-  case ARM::VLDMSDB:
     switch (Mode) {
     default: llvm_unreachable("Unhandled submode!");
     case ARM_AM::ia: return ARM::VLDMSIA_UPD;
@@ -632,7 +647,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
     }
     break;
   case ARM::VLDMDIA:
-  case ARM::VLDMDDB:
     switch (Mode) {
     default: llvm_unreachable("Unhandled submode!");
     case ARM_AM::ia: return ARM::VLDMDIA_UPD;
@@ -640,7 +654,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
     }
     break;
   case ARM::VSTMSIA:
-  case ARM::VSTMSDB:
     switch (Mode) {
     default: llvm_unreachable("Unhandled submode!");
     case ARM_AM::ia: return ARM::VSTMSIA_UPD;
@@ -648,7 +661,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
     }
     break;
   case ARM::VSTMDIA:
-  case ARM::VSTMDDB:
     switch (Mode) {
     default: llvm_unreachable("Unhandled submode!");
     case ARM_AM::ia: return ARM::VSTMDIA_UPD;
@@ -749,7 +761,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
     MIB.addOperand(MI->getOperand(OpNum));
 
   // Transfer memoperands.
-  (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
 
   MBB.erase(MBBI);
   return true;
@@ -1275,14 +1287,14 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
         MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
                      CurrPred, CurrPredReg, Scratch, MemOps, Merges);
 
-        // Try folding preceeding/trailing base inc/dec into the generated
+        // Try folding preceding/trailing base inc/dec into the generated
         // LDM/STM ops.
         for (unsigned i = 0, e = Merges.size(); i < e; ++i)
           if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
             ++NumMerges;
         NumMerges += Merges.size();
 
-        // Try folding preceeding/trailing base inc/dec into those load/store
+        // Try folding preceding/trailing base inc/dec into those load/store
         // that were not merged to form LDM/STM ops.
         for (unsigned i = 0; i != NumMemOps; ++i)
           if (!MemOps[i].Merged)
@@ -1292,7 +1304,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
         // RS may be pointing to an instruction that's deleted.
         RS->skipTo(prior(MBBI));
       } else if (NumMemOps == 1) {
-        // Try folding preceeding/trailing base inc/dec into the single
+        // Try folding preceding/trailing base inc/dec into the single
         // load/store.
         if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
           ++NumMerges;
@@ -1322,7 +1334,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
 }
 
 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
-/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
+/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
 /// directly restore the value of LR into pc.
 ///   ldmfd sp!, {..., lr}
 ///   bx lr
@@ -1530,15 +1542,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
   // Then make sure the immediate offset fits.
   int OffImm = getMemoryOpOffset(Op0);
   if (isT2) {
-    if (OffImm < 0) {
-      if (OffImm < -255)
-        // Can't fall back to t2LDRi8 / t2STRi8.
-        return false;
-    } else {
-      int Limit = (1 << 8) * Scale;
-      if (OffImm >= Limit || (OffImm & (Scale-1)))
-        return false;
-    }
+    int Limit = (1 << 8) * Scale;
+    if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
+      return false;
     Offset = OffImm;
   } else {
     ARM_AM::AddrOpc AddSub = ARM_AM::add;
diff --git a/lib/Target/ARM/ARMMCAsmInfo.cpp b/lib/Target/ARM/ARMMCAsmInfo.cpp
index 53edfca..a3f89e9 100644
--- a/lib/Target/ARM/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/ARMMCAsmInfo.cpp
@@ -12,8 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARMMCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+
 using namespace llvm;
 
+cl::opt<bool>
+EnableARMEHABI("arm-enable-ehabi", cl::Hidden,
+  cl::desc("Generate ARM EHABI tables"),
+  cl::init(false));
+
+
 static const char *const arm_asm_table[] = {
   "{r0}", "r0",
   "{r1}", "r1",
@@ -65,4 +73,8 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
   DwarfRequiresFrameSection = false;
 
   SupportsDebugInformation = true;
+
+  // Exceptions handling
+  if (EnableARMEHABI)
+    ExceptionsType = ExceptionHandling::ARM;
 }
diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/ARMMCCodeEmitter.cpp
index 6d7b485..10607b1 100644
--- a/lib/Target/ARM/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMMCCodeEmitter.cpp
@@ -278,6 +278,15 @@ public:
   unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
                                      SmallVectorImpl<MCFixup> &Fixups) const;
 
+  unsigned getShiftRight8Imm(const MCInst &MI, unsigned Op,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getShiftRight16Imm(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getShiftRight32Imm(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+
   unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
                                       unsigned EncodedValue) const;
   unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI,
@@ -1201,6 +1210,30 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
   return MO.getReg();
 }
 
+unsigned ARMMCCodeEmitter::
+getShiftRight8Imm(const MCInst &MI, unsigned Op,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  return 8 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight16Imm(const MCInst &MI, unsigned Op,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  return 16 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight32Imm(const MCInst &MI, unsigned Op,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  return 32 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight64Imm(const MCInst &MI, unsigned Op,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  return 64 - MI.getOperand(Op).getImm();
+}
+
 void ARMMCCodeEmitter::
 EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                   SmallVectorImpl<MCFixup> &Fixups) const {
diff --git a/lib/Target/ARM/ARMMCExpr.h b/lib/Target/ARM/ARMMCExpr.h
index d42f766..0a2e883 100644
--- a/lib/Target/ARM/ARMMCExpr.h
+++ b/lib/Target/ARM/ARMMCExpr.h
@@ -60,6 +60,9 @@ public:
   bool EvaluateAsRelocatableImpl(MCValue &Res,
                                  const MCAsmLayout *Layout) const;
   void AddValueSymbols(MCAssembler *) const;
+  const MCSection *FindAssociatedSection() const {
+    return getSubExpr()->FindAssociatedSection();
+  }
 
   static bool classof(const MCExpr *E) {
     return E->getKind() == MCExpr::Target;
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index ad51bc1..1cba1ba 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -12,26 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARM.h"
-#include "ARMAddressingModes.h"
 #include "ARMBaseInstrInfo.h"
-#include "ARMInstrInfo.h"
-#include "ARMMachineFunctionInfo.h"
 #include "ARMRegisterInfo.h"
-#include "ARMSubtarget.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii,
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 22d15b5..54bf82a 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -70,6 +70,8 @@ def R4  : ARMReg< 4, "r4">,  DwarfRegNum<[4]>;
 def R5  : ARMReg< 5, "r5">,  DwarfRegNum<[5]>;
 def R6  : ARMReg< 6, "r6">,  DwarfRegNum<[6]>;
 def R7  : ARMReg< 7, "r7">,  DwarfRegNum<[7]>;
+// These require 32-bit instructions.
+let CostPerUse = 1 in {
 def R8  : ARMReg< 8, "r8">,  DwarfRegNum<[8]>;
 def R9  : ARMReg< 9, "r9">,  DwarfRegNum<[9]>;
 def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>;
@@ -78,6 +80,7 @@ def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>;
 def SP  : ARMReg<13, "sp">,  DwarfRegNum<[13]>;
 def LR  : ARMReg<14, "lr">,  DwarfRegNum<[14]>;
 def PC  : ARMReg<15, "pc">,  DwarfRegNum<[15]>;
+}
 
 // Float registers
 def S0  : ARMFReg< 0, "s0">;  def S1  : ARMFReg< 1, "s1">;
@@ -99,33 +102,41 @@ def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
 
 // Aliases of the F* registers used to hold 64-bit fp values (doubles)
 let SubRegIndices = [ssub_0, ssub_1] in {
-def D0  : ARMReg< 0,  "d0", [S0,   S1]>;
-def D1  : ARMReg< 1,  "d1", [S2,   S3]>;
-def D2  : ARMReg< 2,  "d2", [S4,   S5]>;
-def D3  : ARMReg< 3,  "d3", [S6,   S7]>;
-def D4  : ARMReg< 4,  "d4", [S8,   S9]>;
-def D5  : ARMReg< 5,  "d5", [S10, S11]>;
-def D6  : ARMReg< 6,  "d6", [S12, S13]>;
-def D7  : ARMReg< 7,  "d7", [S14, S15]>;
-def D8  : ARMReg< 8,  "d8", [S16, S17]>;
-def D9  : ARMReg< 9,  "d9", [S18, S19]>;
-def D10 : ARMReg<10, "d10", [S20, S21]>;
-def D11 : ARMReg<11, "d11", [S22, S23]>;
-def D12 : ARMReg<12, "d12", [S24, S25]>;
-def D13 : ARMReg<13, "d13", [S26, S27]>;
-def D14 : ARMReg<14, "d14", [S28, S29]>;
-def D15 : ARMReg<15, "d15", [S30, S31]>;
+def D0  : ARMReg< 0,  "d0", [S0,   S1]>, DwarfRegNum<[256]>;
+def D1  : ARMReg< 1,  "d1", [S2,   S3]>, DwarfRegNum<[257]>;
+def D2  : ARMReg< 2,  "d2", [S4,   S5]>, DwarfRegNum<[258]>;
+def D3  : ARMReg< 3,  "d3", [S6,   S7]>, DwarfRegNum<[259]>;
+def D4  : ARMReg< 4,  "d4", [S8,   S9]>, DwarfRegNum<[260]>;
+def D5  : ARMReg< 5,  "d5", [S10, S11]>, DwarfRegNum<[261]>;
+def D6  : ARMReg< 6,  "d6", [S12, S13]>, DwarfRegNum<[262]>;
+def D7  : ARMReg< 7,  "d7", [S14, S15]>, DwarfRegNum<[263]>;
+def D8  : ARMReg< 8,  "d8", [S16, S17]>, DwarfRegNum<[264]>;
+def D9  : ARMReg< 9,  "d9", [S18, S19]>, DwarfRegNum<[265]>;
+def D10 : ARMReg<10, "d10", [S20, S21]>, DwarfRegNum<[266]>;
+def D11 : ARMReg<11, "d11", [S22, S23]>, DwarfRegNum<[267]>;
+def D12 : ARMReg<12, "d12", [S24, S25]>, DwarfRegNum<[268]>;
+def D13 : ARMReg<13, "d13", [S26, S27]>, DwarfRegNum<[269]>;
+def D14 : ARMReg<14, "d14", [S28, S29]>, DwarfRegNum<[270]>;
+def D15 : ARMReg<15, "d15", [S30, S31]>, DwarfRegNum<[271]>;
 }
 
 // VFP3 defines 16 additional double registers
-def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d17">;
-def D18 : ARMFReg<18, "d18">; def D19 : ARMFReg<19, "d19">;
-def D20 : ARMFReg<20, "d20">; def D21 : ARMFReg<21, "d21">;
-def D22 : ARMFReg<22, "d22">; def D23 : ARMFReg<23, "d23">;
-def D24 : ARMFReg<24, "d24">; def D25 : ARMFReg<25, "d25">;
-def D26 : ARMFReg<26, "d26">; def D27 : ARMFReg<27, "d27">;
-def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">;
-def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">;
+def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>; 
+def D17 : ARMFReg<17, "d17">, DwarfRegNum<[273]>;
+def D18 : ARMFReg<18, "d18">, DwarfRegNum<[274]>;
+def D19 : ARMFReg<19, "d19">, DwarfRegNum<[275]>;
+def D20 : ARMFReg<20, "d20">, DwarfRegNum<[276]>;
+def D21 : ARMFReg<21, "d21">, DwarfRegNum<[277]>;
+def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>; 
+def D23 : ARMFReg<23, "d23">, DwarfRegNum<[279]>;
+def D24 : ARMFReg<24, "d24">, DwarfRegNum<[280]>;
+def D25 : ARMFReg<25, "d25">, DwarfRegNum<[281]>;
+def D26 : ARMFReg<26, "d26">, DwarfRegNum<[282]>;
+def D27 : ARMFReg<27, "d27">, DwarfRegNum<[283]>;
+def D28 : ARMFReg<28, "d28">, DwarfRegNum<[284]>;
+def D29 : ARMFReg<29, "d29">, DwarfRegNum<[285]>;
+def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>;
+def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>;
 
 // Advanced SIMD (NEON) defines 16 quad-word aliases
 let SubRegIndices = [dsub_0, dsub_1],
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 82c6735..49fedf6 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -656,19 +656,19 @@ def CortexA9Itineraries : ProcessorItineraries<
                               [1, 1, 1]>,
   //
   // Single-precision to Integer Move
+  //
+  // On A9 move-from-VFP is free to issue with no stall if other VFP
+  // operations are in flight. I assume it still can't dual-issue though.
   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
-                               InstrStage<1, [A9_MUX0], 0>,
-                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
-                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>],
                               [2, 1]>,
   //
   // Double-precision to Integer Move
+  //
+  // On A9 move-from-VFP is free to issue with no stall if other VFP
+  // operations are in flight. I assume it still can't dual-issue though.
   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
-                               InstrStage<1, [A9_MUX0], 0>,
-                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
-                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_NPipe]>],
+                               InstrStage<1, [A9_MUX0], 0>],
                               [2, 1, 1]>,
   //
   // Single-precision FP Load
@@ -691,20 +691,22 @@ def CortexA9Itineraries : ProcessorItineraries<
                               [2, 1]>,
   //
   // FP Load Multiple
+  // FIXME: assumes 2 doubles which requires 2 LS cycles.
   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_NPipe], 0>,
-                               InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
+                               InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
   //
   // FP Load Multiple + update
+  // FIXME: assumes 2 doubles which requires 2 LS cycles.
   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_NPipe], 0>,
-                               InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
+                               InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
   //
   // Single-precision FP Store
   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -725,205 +727,206 @@ def CortexA9Itineraries : ProcessorItineraries<
                               [1, 1]>,
   //
   // FP Store Multiple
+  // FIXME: assumes 2 doubles which requires 2 LS cycles.
   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_NPipe], 0>,
-                               InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
+                               InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
   //
   // FP Store Multiple + update
+  // FIXME: assumes 2 doubles which requires 2 LS cycles.
   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                 InstrStage<1, [A9_MUX0], 0>,
                                 InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                 InstrStage<1, [A9_NPipe], 0>,
-                                InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
+                                InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
   // NEON
   // VLD1
-  // FIXME: Conservatively assume insufficent alignment.
   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                              [2, 1]>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1]>,
   // VLD1x2
   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                              [2, 2, 1]>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1, 1]>,
   // VLD1x3
   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
-                              [2, 2, 3, 1]>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 2, 1]>,
   // VLD1x4
   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
-                              [2, 2, 3, 3, 1]>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 2, 2, 1]>,
   // VLD1u
   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                              [2, 2, 1]>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 2, 1]>,
   // VLD1x2u
   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                              [2, 2, 2, 1]>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1, 2, 1]>,
   // VLD1x3u
   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
-                              [2, 2, 3, 2, 1]>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 2, 2, 1]>,
   // VLD1x4u
   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
-                              [2, 2, 3, 3, 2, 1]>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 2, 2, 2, 1]>,
   //
   // VLD1ln
   InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
-                              [4, 1, 1, 1]>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 1, 1, 1]>,
   //
   // VLD1lnu
   InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
-                              [4, 2, 1, 1, 1, 1]>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 2, 1, 1, 1, 1]>,
   //
   // VLD1dup
   InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                              [3, 1]>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 1]>,
   //
   // VLD1dupu
   InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                              [3, 2, 1, 1]>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 2, 1, 1]>,
   //
   // VLD2
   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                              [3, 3, 1]>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 2, 1]>,
   //
   // VLD2x2
   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
-                              [3, 4, 3, 4, 1]>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 3, 2, 3, 1]>,
   //
   // VLD2ln
   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
-                              [4, 4, 1, 1, 1, 1]>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 3, 1, 1, 1, 1]>,
   //
   // VLD2u
   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                              [3, 3, 2, 1, 1, 1]>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 2, 2, 1, 1, 1]>,
   //
   // VLD2x2u
   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
-                              [3, 4, 3, 4, 2, 1]>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 3, 2, 3, 2, 1]>,
   //
   // VLD2lnu
   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
-                              [4, 4, 2, 1, 1, 1, 1, 1]>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 3, 2, 1, 1, 1, 1, 1]>,
   //
   // VLD2dup
   InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                              [3, 3, 1]>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 2, 1]>,
   //
   // VLD2dupu
   InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                              [3, 3, 2, 1, 1]>,
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 2, 2, 1, 1]>,
   //
   // VLD3
   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<4, [A9_NPipe], 0>,
-                               InstrStage<4, [A9_LSUnit]>],
-                              [4, 4, 5, 1]>,
+                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 1]>,
   //
   // VLD3ln
   InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -938,10 +941,10 @@ def CortexA9Itineraries : ProcessorItineraries<
   InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<4, [A9_NPipe], 0>,
-                               InstrStage<4, [A9_LSUnit]>],
-                              [4, 4, 5, 2, 1]>,
+                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 2, 1]>,
   //
   // VLD3lnu
   InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -974,108 +977,108 @@ def CortexA9Itineraries : ProcessorItineraries<
   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<4, [A9_NPipe], 0>,
-                               InstrStage<4, [A9_LSUnit]>],
-                              [4, 4, 5, 5, 1]>,
+                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 4, 1]>,
   //
   // VLD4ln
   InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<5, [A9_NPipe], 0>,
-                               InstrStage<5, [A9_LSUnit]>],
-                              [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>,
+                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe], 0>,
+                               InstrStage<4, [A9_LSUnit]>],
+                              [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
   //
   // VLD4u
   InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<4, [A9_NPipe], 0>,
-                               InstrStage<4, [A9_LSUnit]>],
-                              [4, 4, 5, 5, 2, 1]>,
+                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 4, 2, 1]>,
   //
   // VLD4lnu
   InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<5, [A9_NPipe], 0>,
-                               InstrStage<5, [A9_LSUnit]>],
-                              [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>,
+                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe], 0>,
+                               InstrStage<4, [A9_LSUnit]>],
+                              [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
   //
   // VLD4dup
   InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
-                              [3, 3, 4, 4, 1]>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 2, 3, 3, 1]>,
   //
   // VLD4dupu
   InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
-                              [3, 3, 4, 4, 2, 1, 1]>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 2, 3, 3, 2, 1, 1]>,
   //
   // VST1
   InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
                               [1, 1, 1]>,
   //
   // VST1x2
   InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
                               [1, 1, 1, 1]>,
   //
   // VST1x3
   InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
                               [1, 1, 1, 1, 2]>,
   //
   // VST1x4
   InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
                               [1, 1, 1, 1, 2, 2]>,
   //
   // VST1u
   InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
                               [2, 1, 1, 1, 1]>,
   //
   // VST1x2u
   InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
                               [2, 1, 1, 1, 1, 1]>,
   //
   // VST1x3u
@@ -1083,44 +1086,44 @@ def CortexA9Itineraries : ProcessorItineraries<
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
                               [2, 1, 1, 1, 1, 1, 2]>,
   //
   // VST1x4u
   InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
                               [2, 1, 1, 1, 1, 1, 2, 2]>,
   //
   // VST1ln
   InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
                               [1, 1, 1]>,
   //
   // VST1lnu
   InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
                               [2, 1, 1, 1, 1]>,
   //
   // VST2
   InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
                               [1, 1, 1, 1]>,
   //
   // VST2x2
@@ -1136,9 +1139,9 @@ def CortexA9Itineraries : ProcessorItineraries<
   InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
                               [2, 1, 1, 1, 1, 1]>,
   //
   // VST2x2u
@@ -1154,36 +1157,36 @@ def CortexA9Itineraries : ProcessorItineraries<
   InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<2, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
                               [1, 1, 1, 1]>,
   //
   // VST2lnu
   InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
                               [2, 1, 1, 1, 1, 1]>,
   //
   // VST3
   InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
                               [1, 1, 1, 1, 2]>,
   //
   // VST3u
   InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
                               [2, 1, 1, 1, 1, 1, 2]>,
   //
   // VST3ln
@@ -1208,36 +1211,36 @@ def CortexA9Itineraries : ProcessorItineraries<
   InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
                               [1, 1, 1, 1, 2, 2]>,
   //
   // VST4u
   InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
                               [2, 1, 1, 1, 1, 1, 2, 2]>,
   //
   // VST4ln
   InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
                               [1, 1, 1, 1, 2, 2]>,
   //
   // VST4lnu
   InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
                                InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<3, [A9_NPipe], 0>,
-                               InstrStage<3, [A9_LSUnit]>],
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
                               [2, 1, 1, 1, 1, 1, 2, 2]>,
 
   //
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 2b9202b..aa1e398 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -35,7 +35,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
   // This requires 4-byte alignment.
   if ((Align & 3) != 0)
     return SDValue();
-  // This requires the copy size to be a constant, preferrably
+  // This requires the copy size to be a constant, preferably
   // within a subtarget-specific limit.
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
   if (!ConstantSize)
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 1465984..c6f266b 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -38,6 +38,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
   , ARMFPUType(None)
   , UseNEONForSinglePrecisionFP(false)
   , SlowFPVMLx(false)
+  , HasVMLxForwarding(false)
   , SlowFPBrcc(false)
   , IsThumb(isT)
   , ThumbMode(Thumb1)
@@ -51,6 +52,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
   , HasT2ExtractPack(false)
   , HasDataBarrier(false)
   , Pref32BitThumb(false)
+  , AvoidCPSRPartialUpdate(false)
   , HasMPExtension(false)
   , FPOnlySP(false)
   , AllowsUnalignedMem(false)
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 76c1c3f..0271c87 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -61,6 +61,10 @@ protected:
   /// whether the FP VML[AS] instructions are slow (if so, don't use them).
   bool SlowFPVMLx;
 
+  /// HasVMLxForwarding - If true, NEON has special multiplier accumulator
+  /// forwarding to allow mul + mla being issued back to back.
+  bool HasVMLxForwarding;
+
   /// SlowFPBrcc - True if floating point compare + branch is slow.
   bool SlowFPBrcc;
 
@@ -106,6 +110,11 @@ protected:
   /// over 16-bit ones.
   bool Pref32BitThumb;
 
+  /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions
+  /// that partially update CPSR and add false dependency on the previous
+  /// CPSR setting instruction.
+  bool AvoidCPSRPartialUpdate;
+
   /// HasMPExtension - True if the subtarget supports Multiprocessing
   /// extension (ARMv7 only).
   bool HasMPExtension;
@@ -182,15 +191,19 @@ protected:
   bool hasT2ExtractPack() const { return HasT2ExtractPack; }
   bool hasDataBarrier() const { return HasDataBarrier; }
   bool useFPVMLx() const { return !SlowFPVMLx; }
+  bool hasVMLxForwarding() const { return HasVMLxForwarding; }
   bool isFPBrccSlow() const { return SlowFPBrcc; }
   bool isFPOnlySP() const { return FPOnlySP; }
   bool prefers32BitThumb() const { return Pref32BitThumb; }
+  bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
   bool hasMPExtension() const { return HasMPExtension; }
 
   bool hasFP16() const { return HasFP16; }
   bool hasD16() const { return HasD16; }
 
-  bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
+  const Triple &getTargetTriple() const { return TargetTriple; }
+
+  bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
   bool isTargetELF() const { return !isTargetDarwin(); }
 
   bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 0ee773b..29aa4f7 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -22,16 +22,13 @@
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static cl::opt<bool>ExpandMLx("expand-fp-mlx", cl::init(false), cl::Hidden);
-
 static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
-  switch (TheTriple.getOS()) {
-  case Triple::Darwin:
+
+  if (TheTriple.isOSDarwin())
     return new ARMMCAsmInfoDarwin();
-  default:
-    return new ARMELFMCAsmInfo();
-  }
+
+  return new ARMELFMCAsmInfo();
 }
 
 // This is duplicated code. Refactor this.
@@ -41,17 +38,17 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
                                     MCCodeEmitter *Emitter,
                                     bool RelaxAll,
                                     bool NoExecStack) {
-  switch (Triple(TT).getOS()) {
-  case Triple::Darwin:
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin())
     return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
-  case Triple::MinGW32:
-  case Triple::Cygwin:
-  case Triple::Win32:
+
+  if (TheTriple.isOSWindows()) {
     llvm_unreachable("ARM does not support Windows COFF format");
     return NULL;
-  default:
-    return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
   }
+
+  return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
 }
 
 extern "C" void LLVMInitializeARMTarget() {
@@ -86,8 +83,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T,
   : LLVMTargetMachine(T, TT),
     Subtarget(TT, FS, isThumb),
     JITInfo(),
-    InstrItins(Subtarget.getInstrItineraryData())
-{
+    InstrItins(Subtarget.getInstrItineraryData()) {
   DefRelocModel = getRelocationModel();
 }
 
@@ -149,8 +145,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
   // FIXME: temporarily disabling load / store optimization pass for Thumb1.
   if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
     PM.add(createARMLoadStoreOptimizationPass(true));
-  if (ExpandMLx &&
-      OptLevel != CodeGenOpt::None && Subtarget.hasVFP2())
+  if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9())
     PM.add(createMLxExpansionPass());
 
   return true;
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 7535da5..19defa1 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -36,8 +36,9 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
                                  ELF::SHF_WRITE |
                                  ELF::SHF_ALLOC,
                                  SectionKind::getDataRel());
+    LSDASection = NULL;
   }
-  
+
   AttributesSection =
     getContext().getELFSection(".ARM.attributes",
                                ELF::SHT_ARM_ATTRIBUTES,
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 129af20..29ecc18 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -29,15 +29,6 @@
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
-/// Shift types used for register controlled shifts in ARM memory addressing.
-enum ShiftType {
-  Lsl,
-  Lsr,
-  Asr,
-  Ror,
-  Rrx
-};
-
 namespace {
 
 class ARMOperand;
@@ -55,8 +46,10 @@ class ARMAsmParser : public TargetAsmParser {
   int TryParseRegister();
   virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
   bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
   bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
-  bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &,
+                   ARMII::AddrMode AddrMode);
   bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
   bool ParsePrefix(ARMMCExpr::VariantKind &RefKind);
   const MCExpr *ApplyPrefixToExpr(const MCExpr *E,
@@ -65,13 +58,14 @@ class ARMAsmParser : public TargetAsmParser {
 
   bool ParseMemoryOffsetReg(bool &Negative,
                             bool &OffsetRegShifted,
-                            enum ShiftType &ShiftType,
+                            enum ARM_AM::ShiftOpc &ShiftType,
                             const MCExpr *&ShiftAmount,
                             const MCExpr *&Offset,
                             bool &OffsetIsReg,
                             int &OffsetRegNum,
                             SMLoc &E);
-  bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E);
+  bool ParseShift(enum ARM_AM::ShiftOpc &St,
+                  const MCExpr *&ShiftAmount, SMLoc &E);
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
   bool ParseDirectiveThumb(SMLoc L);
   bool ParseDirectiveThumbFunc(SMLoc L);
@@ -102,10 +96,25 @@ class ARMAsmParser : public TargetAsmParser {
     SmallVectorImpl<MCParsedAsmOperand*>&);
   OperandMatchResultTy tryParseMSRMaskOperand(
     SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseMemMode2Operand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseMemMode3Operand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+
+  // Asm Match Converter Methods
+  bool CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+                                  const SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+                                  const SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+                                  const SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+                                  const SmallVectorImpl<MCParsedAsmOperand*> &);
 
 public:
   ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
     : TargetAsmParser(T), Parser(_Parser), TM(_TM) {
+      MCAsmParserExtension::Initialize(_Parser);
       // Initialize the set of available features.
       setAvailableFeatures(ComputeAvailableFeatures(
           &TM.getSubtarget<ARMSubtarget>()));
@@ -136,6 +145,7 @@ class ARMOperand : public MCParsedAsmOperand {
     RegisterList,
     DPRRegisterList,
     SPRRegisterList,
+    Shifter,
     Token
   } Kind;
 
@@ -178,13 +188,14 @@ class ARMOperand : public MCParsedAsmOperand {
 
     /// Combined record for all forms of ARM address expressions.
     struct {
+      ARMII::AddrMode AddrMode;
       unsigned BaseRegNum;
       union {
         unsigned RegNum;     ///< Offset register num, when OffsetIsReg.
         const MCExpr *Value; ///< Offset value, when !OffsetIsReg.
       } Offset;
       const MCExpr *ShiftAmount;     // used when OffsetRegShifted is true
-      enum ShiftType ShiftType;      // used when OffsetRegShifted is true
+      enum ARM_AM::ShiftOpc ShiftType; // used when OffsetRegShifted is true
       unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true
       unsigned Preindexed       : 1;
       unsigned Postindexed      : 1;
@@ -192,6 +203,11 @@ class ARMOperand : public MCParsedAsmOperand {
       unsigned Negative         : 1; // only used when OffsetIsReg is true
       unsigned Writeback        : 1;
     } Mem;
+
+    struct {
+      ARM_AM::ShiftOpc ShiftTy;
+      unsigned RegNum;
+    } Shift;
   };
 
   ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -234,6 +250,10 @@ public:
       break;
     case ProcIFlags:
       IFlags = o.IFlags;
+      break;
+    case Shifter:
+      Shift = o.Shift;
+      break;
     }
   }
 
@@ -290,7 +310,9 @@ public:
 
   /// @name Memory Operand Accessors
   /// @{
-
+  ARMII::AddrMode getMemAddrMode() const {
+    return Mem.AddrMode;
+  }
   unsigned getMemBaseRegNum() const {
     return Mem.BaseRegNum;
   }
@@ -310,7 +332,7 @@ public:
     assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
     return Mem.ShiftAmount;
   }
-  enum ShiftType getMemShiftType() const {
+  enum ARM_AM::ShiftOpc getMemShiftType() const {
     assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
     return Mem.ShiftType;
   }
@@ -334,6 +356,52 @@ public:
   bool isToken() const { return Kind == Token; }
   bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; }
   bool isMemory() const { return Kind == Memory; }
+  bool isShifter() const { return Kind == Shifter; }
+  bool isMemMode2() const {
+    if (getMemAddrMode() != ARMII::AddrMode2)
+      return false;
+
+    if (getMemOffsetIsReg())
+      return true;
+
+    if (getMemNegative() &&
+        !(getMemPostindexed() || getMemPreindexed()))
+      return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+
+    // The offset must be in the range 0-4095 (imm12).
+    if (Value > 4095 || Value < -4095)
+      return false;
+
+    return true;
+  }
+  bool isMemMode3() const {
+    if (getMemAddrMode() != ARMII::AddrMode3)
+      return false;
+
+    if (getMemOffsetIsReg()) {
+      if (getMemOffsetRegShifted())
+        return false; // No shift with offset reg allowed
+      return true;
+    }
+
+    if (getMemNegative() &&
+        !(getMemPostindexed() || getMemPreindexed()))
+      return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+
+    // The offset must be in the range 0-255 (imm8).
+    if (Value > 255 || Value < -255)
+      return false;
+
+    return true;
+  }
   bool isMemMode5() const {
     if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() ||
         getMemNegative())
@@ -346,6 +414,23 @@ public:
     int64_t Value = CE->getValue();
     return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
   }
+  bool isMemMode7() const {
+    if (!isMemory() ||
+        getMemPreindexed() ||
+        getMemPostindexed() ||
+        getMemOffsetIsReg() ||
+        getMemNegative() ||
+        getMemWriteback())
+      return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    if (!CE) return false;
+
+    if (CE->getValue())
+      return false;
+
+    return true;
+  }
   bool isMemModeRegThumb() const {
     if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback())
       return false;
@@ -402,6 +487,12 @@ public:
     Inst.addOperand(MCOperand::CreateReg(getReg()));
   }
 
+  void addShifterOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(
+      ARM_AM::getSORegOpc(Shift.ShiftTy, 0)));
+  }
+
   void addRegListOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const SmallVectorImpl<unsigned> &RegList = getRegList();
@@ -428,6 +519,88 @@ public:
     Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
   }
 
+  void addMemMode7Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && isMemMode7() && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    (void)CE;
+    assert((CE || CE->getValue() == 0) &&
+           "No offset operand support in mode 7");
+  }
+
+  void addMemMode2Operands(MCInst &Inst, unsigned N) const {
+    assert(isMemMode2() && "Invalid mode or number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+    unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1);
+
+    if (getMemOffsetIsReg()) {
+      Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+
+      ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add;
+      ARM_AM::ShiftOpc ShOpc = ARM_AM::no_shift;
+      int64_t ShiftAmount = 0;
+
+      if (getMemOffsetRegShifted()) {
+        ShOpc = getMemShiftType();
+        const MCConstantExpr *CE =
+                   dyn_cast<MCConstantExpr>(getMemShiftAmount());
+        ShiftAmount = CE->getValue();
+      }
+
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(AMOpc, ShiftAmount,
+                                           ShOpc, IdxMode)));
+      return;
+    }
+
+    // Create a operand placeholder to always yield the same number of operands.
+    Inst.addOperand(MCOperand::CreateReg(0));
+
+    // FIXME: #-0 is encoded differently than #0. Does the parser preserve
+    // the difference?
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    assert(CE && "Non-constant mode 2 offset operand!");
+    int64_t Offset = CE->getValue();
+
+    if (Offset >= 0)
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::add,
+                                           Offset, ARM_AM::no_shift, IdxMode)));
+    else
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::sub,
+                                          -Offset, ARM_AM::no_shift, IdxMode)));
+  }
+
+  void addMemMode3Operands(MCInst &Inst, unsigned N) const {
+    assert(isMemMode3() && "Invalid mode or number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+    unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1);
+
+    if (getMemOffsetIsReg()) {
+      Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+
+      ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add;
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(AMOpc, 0,
+                                                             IdxMode)));
+      return;
+    }
+
+    // Create a operand placeholder to always yield the same number of operands.
+    Inst.addOperand(MCOperand::CreateReg(0));
+
+    // FIXME: #-0 is encoded differently than #0. Does the parser preserve
+    // the difference?
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    assert(CE && "Non-constant mode 3 offset operand!");
+    int64_t Offset = CE->getValue();
+
+    if (Offset >= 0)
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::add,
+                                           Offset, IdxMode)));
+    else
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::sub,
+                                           -Offset, IdxMode)));
+  }
+
   void addMemMode5Operands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && isMemMode5() && "Invalid number of operands!");
 
@@ -525,6 +698,15 @@ public:
     return Op;
   }
 
+  static ARMOperand *CreateShifter(ARM_AM::ShiftOpc ShTy,
+                                   SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(Shifter);
+    Op->Shift.ShiftTy = ShTy;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
   static ARMOperand *
   CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs,
                 SMLoc StartLoc, SMLoc EndLoc) {
@@ -553,9 +735,10 @@ public:
     return Op;
   }
 
-  static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
-                               const MCExpr *Offset, int OffsetRegNum,
-                               bool OffsetRegShifted, enum ShiftType ShiftType,
+  static ARMOperand *CreateMem(ARMII::AddrMode AddrMode, unsigned BaseRegNum,
+                               bool OffsetIsReg, const MCExpr *Offset,
+                               int OffsetRegNum, bool OffsetRegShifted,
+                               enum ARM_AM::ShiftOpc ShiftType,
                                const MCExpr *ShiftAmount, bool Preindexed,
                                bool Postindexed, bool Negative, bool Writeback,
                                SMLoc S, SMLoc E) {
@@ -571,6 +754,7 @@ public:
            "Cannot have expression offset and register offset!");
 
     ARMOperand *Op = new ARMOperand(Memory);
+    Op->Mem.AddrMode = AddrMode;
     Op->Mem.BaseRegNum = BaseRegNum;
     Op->Mem.OffsetIsReg = OffsetIsReg;
     if (OffsetIsReg)
@@ -642,7 +826,8 @@ void ARMOperand::dump(raw_ostream &OS) const {
     break;
   case Memory:
     OS << "<memory "
-       << "base:" << getMemBaseRegNum();
+       << "am:" << ARMII::AddrModeToString(getMemAddrMode())
+       << " base:" << getMemBaseRegNum();
     if (getMemOffsetIsReg()) {
       OS << " offset:<register " << getMemOffsetRegNum();
       if (getMemOffsetRegShifted()) {
@@ -676,6 +861,9 @@ void ARMOperand::dump(raw_ostream &OS) const {
   case Register:
     OS << "<register " << getReg() << ">";
     break;
+  case Shifter:
+    OS << "<shifter " << getShiftOpcStr(Shift.ShiftTy) << ">";
+    break;
   case RegisterList:
   case DPRRegisterList:
   case SPRRegisterList: {
@@ -738,6 +926,42 @@ int ARMAsmParser::TryParseRegister() {
   return RegNum;
 }
 
+/// Try to parse a register name.  The token must be an Identifier when called,
+/// and if it is a register name the token is eaten and the register number is
+/// returned.  Otherwise return -1.
+///
+bool ARMAsmParser::TryParseShiftRegister(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+  std::string upperCase = Tok.getString().str();
+  std::string lowerCase = LowercaseString(upperCase);
+  ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
+      .Case("lsl", ARM_AM::lsl)
+      .Case("lsr", ARM_AM::lsr)
+      .Case("asr", ARM_AM::asr)
+      .Case("ror", ARM_AM::ror)
+      .Case("rrx", ARM_AM::rrx)
+      .Default(ARM_AM::no_shift);
+
+  if (ShiftTy == ARM_AM::no_shift)
+    return true;
+
+  Parser.Lex(); // Eat shift-type operand;
+  int RegNum = TryParseRegister();
+  if (RegNum == -1)
+    return Error(Parser.getTok().getLoc(), "register expected");
+
+  Operands.push_back(ARMOperand::CreateReg(RegNum,S, Parser.getTok().getLoc()));
+  Operands.push_back(ARMOperand::CreateShifter(ShiftTy,
+                                               S, Parser.getTok().getLoc()));
+
+  return false;
+}
+
+
 /// Try to parse a register name.  The token must be an Identifier when called.
 /// If it's a register, an AsmOperand is created. Another AsmOperand is created
 /// if there is a "writeback". 'true' if it's not a register.
@@ -1046,13 +1270,96 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   return MatchOperand_Success;
 }
 
+/// tryParseMemMode2Operand - Try to parse memory addressing mode 2 operand.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMemMode2Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\"");
+
+  if (ParseMemory(Operands, ARMII::AddrMode2))
+    return MatchOperand_NoMatch;
+
+  return MatchOperand_Success;
+}
+
+/// tryParseMemMode3Operand - Try to parse memory addressing mode 3 operand.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMemMode3Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\"");
+
+  if (ParseMemory(Operands, ARMII::AddrMode3))
+    return MatchOperand_NoMatch;
+
+  return MatchOperand_Success;
+}
+
+/// CvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+                         const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+  // Create a writeback register dummy placeholder.
+  Inst.addOperand(MCOperand::CreateImm(0));
+
+  ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3);
+  ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+  return true;
+}
+
+/// CvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+                         const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Create a writeback register dummy placeholder.
+  Inst.addOperand(MCOperand::CreateImm(0));
+  ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+  ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3);
+  ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+  return true;
+}
+
+/// CvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+                         const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+  // Create a writeback register dummy placeholder.
+  Inst.addOperand(MCOperand::CreateImm(0));
+
+  ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3);
+  ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+  return true;
+}
+
+/// CvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+                         const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Create a writeback register dummy placeholder.
+  Inst.addOperand(MCOperand::CreateImm(0));
+  ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+  ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3);
+  ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+  return true;
+}
+
 /// Parse an ARM memory expression, return false if successful else return true
 /// or an error.  The first token must be a '[' when called.
 ///
 /// TODO Only preindexing and postindexing addressing are started, unindexed
 /// with option, etc are still to do.
 bool ARMAsmParser::
-ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+            ARMII::AddrMode AddrMode = ARMII::AddrModeNone) {
   SMLoc S, E;
   assert(Parser.getTok().is(AsmToken::LBrac) &&
          "Token is not a Left Bracket");
@@ -1083,7 +1390,7 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   ARMOperand *WBOp = 0;
   int OffsetRegNum = -1;
   bool OffsetRegShifted = false;
-  enum ShiftType ShiftType = Lsl;
+  enum ARM_AM::ShiftOpc ShiftType = ARM_AM::lsl;
   const MCExpr *ShiftAmount = 0;
   const MCExpr *Offset = 0;
 
@@ -1106,10 +1413,17 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
     const AsmToken &ExclaimTok = Parser.getTok();
     if (ExclaimTok.is(AsmToken::Exclaim)) {
+      // None of addrmode3 instruction uses "!"
+      if (AddrMode == ARMII::AddrMode3)
+        return true;
+
       WBOp = ARMOperand::CreateToken(ExclaimTok.getString(),
                                      ExclaimTok.getLoc());
       Writeback = true;
       Parser.Lex(); // Eat exclaim token
+    } else { // In addressing mode 2, pre-indexed mode always end with "!"
+      if (AddrMode == ARMII::AddrMode2)
+        Preindexed = false;
     }
   } else {
     // The "[Rn" we have so far was not followed by a comma.
@@ -1143,13 +1457,17 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   if (!OffsetIsReg) {
     if (!Offset)
       Offset = MCConstantExpr::Create(0, getContext());
+  } else {
+    if (AddrMode == ARMII::AddrMode3 && OffsetRegShifted) {
+      Error(E, "shift amount not supported");
+      return true;
+    }
   }
 
-  Operands.push_back(ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset,
-                                           OffsetRegNum, OffsetRegShifted,
-                                           ShiftType, ShiftAmount, Preindexed,
-                                           Postindexed, Negative, Writeback,
-                                           S, E));
+  Operands.push_back(ARMOperand::CreateMem(AddrMode, BaseRegNum, OffsetIsReg,
+                                     Offset, OffsetRegNum, OffsetRegShifted,
+                                     ShiftType, ShiftAmount, Preindexed,
+                                     Postindexed, Negative, Writeback, S, E));
   if (WBOp)
     Operands.push_back(WBOp);
 
@@ -1165,7 +1483,7 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 /// we return false on success or an error otherwise.
 bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
                                         bool &OffsetRegShifted,
-                                        enum ShiftType &ShiftType,
+                                        enum ARM_AM::ShiftOpc &ShiftType,
                                         const MCExpr *&ShiftAmount,
                                         const MCExpr *&Offset,
                                         bool &OffsetIsReg,
@@ -1226,28 +1544,28 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
 ///   ( lsl | lsr | asr | ror ) , # shift_amount
 ///   rrx
 /// and returns true if it parses a shift otherwise it returns false.
-bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount,
-                              SMLoc &E) {
+bool ARMAsmParser::ParseShift(ARM_AM::ShiftOpc &St,
+                              const MCExpr *&ShiftAmount, SMLoc &E) {
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier))
     return true;
   StringRef ShiftName = Tok.getString();
   if (ShiftName == "lsl" || ShiftName == "LSL")
-    St = Lsl;
+    St = ARM_AM::lsl;
   else if (ShiftName == "lsr" || ShiftName == "LSR")
-    St = Lsr;
+    St = ARM_AM::lsr;
   else if (ShiftName == "asr" || ShiftName == "ASR")
-    St = Asr;
+    St = ARM_AM::asr;
   else if (ShiftName == "ror" || ShiftName == "ROR")
-    St = Ror;
+    St = ARM_AM::ror;
   else if (ShiftName == "rrx" || ShiftName == "RRX")
-    St = Rrx;
+    St = ARM_AM::rrx;
   else
     return true;
   Parser.Lex(); // Eat shift type token.
 
   // Rrx stands alone.
-  if (St == Rrx)
+  if (St == ARM_AM::rrx)
     return false;
 
   // Otherwise, there must be a '#' and a shift amount.
@@ -1286,6 +1604,9 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
   case AsmToken::Identifier:
     if (!TryParseRegisterWithWriteBack(Operands))
       return false;
+    if (!TryParseShiftRegister(Operands))
+      return false;
+
 
     // Fall though for the Identifier case that is not a register or a
     // special name.
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 78d73d3..bdce2c4 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -18,6 +18,7 @@
 #include "ARMDisassembler.h"
 #include "ARMDisassemblerCore.h"
 
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -94,6 +95,9 @@ static unsigned decodeARMInstruction(uint32_t &insn) {
   // As a result, the decoder fails to deocode USAT properly.
   if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1)
     return ARM::USAT;
+  // As a result, the decoder fails to deocode UQADD16 properly.
+  if (slice(insn, 27, 20) == 0x66 && slice(insn, 7, 4) == 1)
+    return ARM::UQADD16;
 
   // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
   // As a result, the decoder fails to decode UMULL properly.
@@ -280,6 +284,24 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
   }
 }
 
+// Helper function for special case handling of PLD (literal) and friends.
+// See A8.6.117 T1 & T2 and friends for why we morphed the opcode
+// before returning it.
+static unsigned T2Morph2PLDLiteral(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return Opcode; // Return unmorphed opcode.
+
+  case ARM::t2PLDi8:   case ARM::t2PLDs:
+  case ARM::t2PLDWi12: case ARM::t2PLDWi8:
+  case ARM::t2PLDWs:
+    return ARM::t2PLDi12;
+
+  case ARM::t2PLIi8:   case ARM::t2PLIs:
+    return ARM::t2PLIi12;
+  }
+}
+
 /// decodeThumbSideEffect is a decorator function which can potentially twiddle
 /// the instruction or morph the returned opcode under Thumb2.
 ///
@@ -330,12 +352,27 @@ static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) {
     }
     // --------- Transform End Marker ---------
 
+    unsigned unmorphed = decodeThumbInstruction(insn);
+
     // See, for example, A6.3.7 Load word: Table A6-18 Load word.
     // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
     // before returning it to our caller.
     if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1
-        && slice(insn, 19, 16) == 15)
-      return T2Morph2LoadLiteral(decodeThumbInstruction(insn));
+        && slice(insn, 19, 16) == 15) {
+      unsigned morphed = T2Morph2LoadLiteral(unmorphed);
+      if (morphed != unmorphed)
+        return morphed;
+    }
+
+    // See, for example, A8.6.117 PLD,PLDW (immediate) T1 & T2, and friends for
+    // why we morphed the opcode before returning it to our caller.
+    if (slice(insn, 31, 25) == 0x7C && slice(insn, 15, 12) == 0xF
+        && slice(insn, 22, 22) == 0 && slice(insn, 20, 20) == 1
+        && slice(insn, 19, 16) == 15) {
+      unsigned morphed = T2Morph2PLDLiteral(unmorphed);
+      if (morphed != unmorphed)
+        return morphed;
+    }
 
     // One last check for NEON/VFP instructions.
     if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1)
@@ -375,21 +412,23 @@ bool ARMDisassembler::getInstruction(MCInst &MI,
   Size = 4;
 
   DEBUG({
-      errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
+      errs() << "\nOpcode=" << Opcode << " Name=" <<ARMUtils::OpcodeName(Opcode)
              << " Format=" << stringForARMFormat(Format) << '(' << (int)Format
              << ")\n";
       showBitVector(errs(), insn);
     });
 
-  ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
+  OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format));
   if (!Builder)
     return false;
 
+  Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(),
+                                              getDisInfoBlock(), getMCContext(),
+                                              Address);
+
   if (!Builder->Build(MI, insn))
     return false;
 
-  delete Builder;
-
   return true;
 }
 
@@ -398,7 +437,7 @@ bool ThumbDisassembler::getInstruction(MCInst &MI,
                                        const MemoryObject &Region,
                                        uint64_t Address,
                                        raw_ostream &os) const {
-  // The Thumb instruction stream is a sequence of halhwords.
+  // The Thumb instruction stream is a sequence of halfwords.
 
   // This represents the first halfword as well as the machine instruction
   // passed to decodeThumbInstruction().  For 16-bit Thumb instruction, the top
@@ -463,17 +502,19 @@ bool ThumbDisassembler::getInstruction(MCInst &MI,
       showBitVector(errs(), insn);
     });
 
-  ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
+  OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format));
   if (!Builder)
     return false;
 
   Builder->SetSession(const_cast<Session *>(&SO));
 
+  Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(),
+                                              getDisInfoBlock(), getMCContext(),
+                                              Address);
+
   if (!Builder->Build(MI, insn))
     return false;
 
-  delete Builder;
-
   return true;
 }
 
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index bac68dd..642829c 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -17,6 +17,7 @@
 
 #include "ARMDisassemblerCore.h"
 #include "ARMAddressingModes.h"
+#include "ARMMCExpr.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -82,10 +83,28 @@ const char *ARMUtils::OpcodeName(unsigned Opcode) {
 // FIXME: Auto-gened?
 static unsigned
 getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) {
-  // For this purpose, we can treat rGPR as if it were GPR.
-  if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID;
+  if (RegClassID == ARM::rGPRRegClassID) {
+    // Check for The register numbers 13 and 15 that are not permitted for many
+    // Thumb register specifiers.
+    if (RawRegister == 13 || RawRegister == 15) {
+      B->SetErr(-1);
+      return 0;
+    }
+    // For this purpose, we can treat rGPR as if it were GPR.
+    RegClassID = ARM::GPRRegClassID;
+  }
 
   // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm().
+  // A7.3 register encoding
+  //     Qd -> bit[12] == 0
+  //     Qn -> bit[16] == 0
+  //     Qm -> bit[0]  == 0
+  //
+  // If one of these bits is 1, the instruction is UNDEFINED.
+  if (RegClassID == ARM::QPRRegClassID && slice(RawRegister, 0, 0) == 1) {
+    B->SetErr(-1);
+    return 0;
+  }
   unsigned RegNum =
     RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister;
 
@@ -497,14 +516,66 @@ static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
   return false;
 }
 
+// A8.6.94 MLA
+// if d == 15 || n == 15 || m == 15 || a == 15 then UNPREDICTABLE;
+//
+// A8.6.105 MUL
+// if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+//
+// A8.6.246 UMULL
+// if dLo == 15 || dHi == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+// if dHi == dLo then UNPREDICTABLE;
+static bool BadRegsMulFrm(unsigned Opcode, uint32_t insn) {
+  unsigned R19_16 = slice(insn, 19, 16);
+  unsigned R15_12 = slice(insn, 15, 12);
+  unsigned R11_8  = slice(insn, 11, 8);
+  unsigned R3_0   = slice(insn, 3, 0);
+  switch (Opcode) {
+  default:
+    // Did we miss an opcode?
+    DEBUG(errs() << "BadRegsMulFrm: unexpected opcode!");
+    return false;
+  case ARM::MLA:     case ARM::MLS:     case ARM::SMLABB:  case ARM::SMLABT:
+  case ARM::SMLATB:  case ARM::SMLATT:  case ARM::SMLAWB:  case ARM::SMLAWT:
+  case ARM::SMMLA:   case ARM::SMMLAR:  case ARM::SMMLS:   case ARM::SMMLSR:
+  case ARM::USADA8:
+    if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15)
+      return true;
+    return false;
+  case ARM::MUL:     case ARM::SMMUL:   case ARM::SMMULR:
+  case ARM::SMULBB:  case ARM::SMULBT:  case ARM::SMULTB:  case ARM::SMULTT:
+  case ARM::SMULWB:  case ARM::SMULWT:  case ARM::SMUAD:   case ARM::SMUADX:
+  // A8.6.167 SMLAD & A8.6.172 SMLSD
+  case ARM::SMLAD:   case ARM::SMLADX:  case ARM::SMLSD:   case ARM::SMLSDX:
+  case ARM::USAD8:
+    if (R19_16 == 15 || R11_8 == 15 || R3_0 == 15)
+      return true;
+    return false;
+  case ARM::SMLAL:   case ARM::SMULL:   case ARM::UMAAL:   case ARM::UMLAL:
+  case ARM::UMULL:
+  case ARM::SMLALBB: case ARM::SMLALBT: case ARM::SMLALTB: case ARM::SMLALTT:
+  case ARM::SMLALD:  case ARM::SMLALDX: case ARM::SMLSLD:  case ARM::SMLSLDX:
+    if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15)
+      return true;
+    if (R19_16 == R15_12)
+      return true;
+    return false;;
+  }
+}
+
 // Multiply Instructions.
-// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLS:
+// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLAR,
+// SMMLS, SMMLAR, SMLAD, SMLADX, SMLSD, SMLSDX, and USADA8 (for convenience):
 //     Rd{19-16} Rn{3-0} Rm{11-8} Ra{15-12}
+// But note that register checking for {SMLAD, SMLADX, SMLSD, SMLSDX} is
+// only for {d, n, m}.
 //
-// MUL, SMMUL, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT:
+// MUL, SMMUL, SMMULR, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT, SMUAD,
+// SMUADX, and USAD8 (for convenience):
 //     Rd{19-16} Rn{3-0} Rm{11-8}
 //
-// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT:
+// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT,
+// SMLALD, SMLADLX, SMLSLD, SMLSLDX:
 //     RdLo{15-12} RdHi{19-16} Rn{3-0} Rm{11-8}
 //
 // The mapping of the multiply registers to the "regular" ARM registers, where
@@ -531,6 +602,10 @@ static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
          && OpInfo[2].RegClass == ARM::GPRRegClassID
          && "Expect three register operands");
 
+  // Sanity check for the register encodings.
+  if (BadRegsMulFrm(Opcode, insn))
+    return false;
+
   // Instructions with two destination registers have RdLo{15-12} first.
   if (NumDefs == 2) {
     assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID &&
@@ -618,18 +693,38 @@ static inline unsigned GetCopOpc(uint32_t insn) {
 static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  assert(NumOps >= 5 && "Num of operands >= 5 for coprocessor instr");
+  assert(NumOps >= 4 && "Num of operands >= 4 for coprocessor instr");
 
   unsigned &OpIdx = NumOpsAdded;
+  // A8.6.92
+  // if coproc == '101x' then SEE "Advanced SIMD and VFP"
+  // But since the special instructions have more explicit encoding bits
+  // specified, if coproc == 10 or 11, we should reject it as invalid.
+  unsigned coproc = GetCoprocessor(insn);
+  if ((Opcode == ARM::MCR || Opcode == ARM::MCRR ||
+       Opcode == ARM::MRC || Opcode == ARM::MRRC) &&
+      (coproc == 10 || coproc == 11)) {
+    DEBUG(errs() << "Encoding error: coproc == 10 or 11 for MCR[R]/MR[R]C\n");
+    return false;
+  }
+
   bool OneCopOpc = (Opcode == ARM::MCRR || Opcode == ARM::MCRR2 ||
                     Opcode == ARM::MRRC || Opcode == ARM::MRRC2);
+
   // CDP/CDP2 has no GPR operand; the opc1 operand is also wider (Inst{23-20}).
   bool NoGPR = (Opcode == ARM::CDP || Opcode == ARM::CDP2);
   bool LdStCop = LdStCopOpcode(Opcode);
+  bool RtOut = (Opcode == ARM::MRC || Opcode == ARM::MRC2);
 
   OpIdx = 0;
 
-  MI.addOperand(MCOperand::CreateImm(GetCoprocessor(insn)));
+  if (RtOut) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    ++OpIdx;
+  }
+  MI.addOperand(MCOperand::CreateImm(coproc));
+  ++OpIdx;
 
   if (LdStCop) {
     // Unindex if P:W = 0b00 --> _OPTION variant
@@ -639,26 +734,34 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
 
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
+    OpIdx += 2;
 
     if (PW) {
       MI.addOperand(MCOperand::CreateReg(0));
       ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+      const TargetInstrDesc &TID = ARMInsts[Opcode];
+      unsigned IndexMode =
+                  (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
       unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2,
-                                          ARM_AM::no_shift);
+                                          ARM_AM::no_shift, IndexMode);
       MI.addOperand(MCOperand::CreateImm(Offset));
-      OpIdx = 5;
+      OpIdx += 2;
     } else {
       MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 0)));
-      OpIdx = 4;
+      ++OpIdx;
     }
   } else {
     MI.addOperand(MCOperand::CreateImm(OneCopOpc ? GetCopOpc(insn)
                                                  : GetCopOpc1(insn, NoGPR)));
+    ++OpIdx;
 
-    MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn))
-                        : MCOperand::CreateReg(
-                            getRegisterEnum(B, ARM::GPRRegClassID,
-                                            decodeRd(insn))));
+    if (!RtOut) {
+      MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn))
+                          : MCOperand::CreateReg(
+                                getRegisterEnum(B, ARM::GPRRegClassID,
+                                                decodeRd(insn))));
+      ++OpIdx;
+    }
 
     MI.addOperand(OneCopOpc ? MCOperand::CreateReg(
                                 getRegisterEnum(B, ARM::GPRRegClassID,
@@ -667,7 +770,7 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
 
     MI.addOperand(MCOperand::CreateImm(decodeRm(insn)));
 
-    OpIdx = 5;
+    OpIdx += 2;
 
     if (!OneCopOpc) {
       MI.addOperand(MCOperand::CreateImm(GetCopOpc2(insn)));
@@ -679,8 +782,8 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 // Branch Instructions.
-// BLr9: SignExtend(Imm24:'00', 32)
-// Bcc, BLr9_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1
+// BL: SignExtend(Imm24:'00', 32)
+// Bcc, BL_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1
 // SMC: ZeroExtend(imm4, 32)
 // SVC: ZeroExtend(Imm24, 32)
 //
@@ -735,6 +838,11 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   // MSRi take a mask, followed by one so_imm operand. The mask contains the
   // R Bit in bit 4, and the special register fields in bits 3-0.
   if (Opcode == ARM::MSRi) {
+    // A5.2.11 MSR (immediate), and hints & B6.1.6 MSR (immediate)
+    // The hints instructions have more specific encodings, so if mask == 0,
+    // we should reject this as an invalid instruction.
+    if (slice(insn, 19, 16) == 0)
+      return false;
     MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
                                        slice(insn, 19, 16) /* Special Reg */ ));
     // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
@@ -760,11 +868,11 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     return true;
   }
 
-  assert((Opcode == ARM::Bcc || Opcode == ARM::BLr9 || Opcode == ARM::BLr9_pred
+  assert((Opcode == ARM::Bcc || Opcode == ARM::BL || Opcode == ARM::BL_pred
           || Opcode == ARM::SMC || Opcode == ARM::SVC) &&
          "Unexpected Opcode");
 
-  assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Reg operand expected");
+  assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected");
 
   int Imm32 = 0;
   if (Opcode == ARM::SMC) {
@@ -778,12 +886,6 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned Imm26 = slice(insn, 23, 0) << 2;
     //Imm32 = signextend<signed int, 26>(Imm26);
     Imm32 = SignExtend32<26>(Imm26);
-
-    // When executing an ARM instruction, PC reads as the address of the current
-    // instruction plus 8.  The assembler subtracts 8 from the difference
-    // between the branch instruction and the target address, disassembler has
-    // to add 8 to compensate.
-    Imm32 += 8;
   }
 
   MI.addOperand(MCOperand::CreateImm(Imm32));
@@ -793,7 +895,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 // Misc. Branch Instructions.
-// BLXr9, BXr9
+// BLX, BLXi, BX
 // BX, BX_RET
 static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -809,8 +911,9 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR)
     return true;
 
-  // BLXr9 and BX take one GPR reg.
-  if (Opcode == ARM::BLXr9 || Opcode == ARM::BX) {
+  // BLX and BX take one GPR reg.
+  if (Opcode == ARM::BLX || Opcode == ARM::BLX_pred ||
+      Opcode == ARM::BX) {
     assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -819,6 +922,17 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     return true;
   }
 
+  // BLXi takes imm32 (the PC offset).
+  if (Opcode == ARM::BLXi) {
+    assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected");
+    // SignExtend(imm24:H:'0', 32) where imm24 = Inst{23-0} and H = Inst{24}.
+    unsigned Imm26 = slice(insn, 23, 0) << 2 | slice(insn, 24, 24) << 1;
+    int Imm32 = SignExtend32<26>(Imm26);
+    MI.addOperand(MCOperand::CreateImm(Imm32));
+    OpIdx = 1;
+    return true;
+  }
+
   return false;
 }
 
@@ -837,6 +951,24 @@ static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) {
   return true;
 }
 
+// Standard data-processing instructions allow PC as a register specifier,
+// but we should reject other DPFrm instructions with PC as registers.
+static bool BadRegsDPFrm(unsigned Opcode, uint32_t insn) {
+  switch (Opcode) {
+  default:
+    // Did we miss an opcode?
+    if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || decodeRm(insn) == 15) {
+      DEBUG(errs() << "DPFrm with bad reg specifier(s)\n");
+      return true;
+    }
+  case ARM::ADCrr:  case ARM::ADDSrr: case ARM::ADDrr:  case ARM::ANDrr:
+  case ARM::BICrr:  case ARM::CMNzrr: case ARM::CMPrr:  case ARM::EORrr:
+  case ARM::ORRrr:  case ARM::RSBrr:  case ARM::RSCrr:  case ARM::SBCrr:
+  case ARM::SUBSrr: case ARM::SUBrr:  case ARM::TEQrr:  case ARM::TSTrr:
+    return false;
+  }
+}
+
 // A major complication is the fact that some of the saturating add/subtract
 // operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
 // They are QADD, QDADD, QDSUB, and QSUB.
@@ -864,6 +996,10 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   // Special-case handling of BFC/BFI/SBFX/UBFX.
   if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
+    // A8.6.17 BFC & A8.6.18 BFI
+    // Sanity check Rd.
+    if (decodeRd(insn) == 15)
+      return false;
     MI.addOperand(MCOperand::CreateReg(0));
     if (Opcode == ARM::BFI) {
       MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -879,6 +1015,9 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     return true;
   }
   if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) {
+    // Sanity check Rd and Rm.
+    if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+      return false;
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
     MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7)));
@@ -915,15 +1054,21 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     // Assert disabled because saturating operations, e.g., A8.6.127 QASX, are
     // routed here as well.
     // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form");
+    if (BadRegsDPFrm(Opcode, insn))
+      return false;
     MI.addOperand(MCOperand::CreateReg(
                     getRegisterEnum(B, ARM::GPRRegClassID,
                                     RmRn? decodeRn(insn) : decodeRm(insn))));
     ++OpIdx;
   } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) {
+    // These two instructions don't allow d as 15.
+    if (decodeRd(insn) == 15)
+      return false;
     // We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}).
     assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
     unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0);
-    MI.addOperand(MCOperand::CreateImm(Imm16));
+    if (!B->tryAddingSymbolicOperand(Imm16, 4, MI))
+      MI.addOperand(MCOperand::CreateImm(Imm16));
     ++OpIdx;
   } else {
     // We have a reg/imm form.
@@ -992,6 +1137,21 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRm(insn))));
   if (Rs) {
+    // If Inst{7} != 0, we should reject this insn as an invalid encoding.
+    if (slice(insn, 7, 7))
+      return false;
+
+    // A8.6.3 ADC (register-shifted register)
+    // if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE;
+    // 
+    // This also accounts for shift instructions (register) where, fortunately,
+    // Inst{19-16} = 0b0000.
+    // A8.6.89 LSL (register)
+    // if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+    if (decodeRd(insn) == 15 || decodeRn(insn) == 15 ||
+        decodeRm(insn) == 15 || decodeRs(insn) == 15)
+      return false;
+    
     // Register-controlled shifts: [Rm, Rs, shift].
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRs(insn))));
@@ -1015,6 +1175,71 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   return true;
 }
 
+static bool BadRegsLdStFrm(unsigned Opcode, uint32_t insn, bool Store, bool WBack,
+                           bool Imm) {
+  const StringRef Name = ARMInsts[Opcode].Name;
+  unsigned Rt = decodeRd(insn);
+  unsigned Rn = decodeRn(insn);
+  unsigned Rm = decodeRm(insn);
+  unsigned P  = getPBit(insn);
+  unsigned W  = getWBit(insn);
+
+  if (Store) {
+    // Only STR (immediate, register) allows PC as the source.
+    if (Name.startswith("STRB") && Rt == 15) {
+      DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+      return true;
+    }
+    if (WBack && (Rn == 15 || Rn == Rt)) {
+      DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n");
+      return true;
+    }
+    if (!Imm && Rm == 15) {
+      DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n");
+      return true;
+    }
+  } else {
+    // Only LDR (immediate, register) allows PC as the destination.
+    if (Name.startswith("LDRB") && Rt == 15) {
+      DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+      return true;
+    }
+    if (Imm) {
+      // Immediate
+      if (Rn == 15) {
+        // The literal form must be in offset mode; it's an encoding error
+        // otherwise.
+        if (!(P == 1 && W == 0)) {
+          DEBUG(errs() << "Ld literal form with !(P == 1 && W == 0)\n");
+          return true;
+        }
+        // LDRB (literal) does not allow PC as the destination.
+        if (Opcode != ARM::LDRi12 && Rt == 15) {
+          DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+          return true;
+        }
+      } else {
+        // Write back while Rn == Rt does not make sense.
+        if (WBack && (Rn == Rt)) {
+          DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n");
+          return true;
+        }
+      }
+    } else {
+      // Register
+      if (Rm == 15) {
+        DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n");
+        return true;
+      }
+      if (WBack && (Rn == 15 || Rn == Rt)) {
+        DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n");
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
 
@@ -1077,19 +1302,41 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (OpIdx + 1 >= NumOps)
     return false;
 
-  assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
-         (OpInfo[OpIdx+1].RegClass < 0) &&
-         "Expect 1 reg operand followed by 1 imm operand");
+  if (BadRegsLdStFrm(Opcode, insn, isStore, isPrePost, getIBit(insn)==0))
+    return false;
 
   ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+  unsigned IndexMode =
+               (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
   if (getIBit(insn) == 0) {
-    MI.addOperand(MCOperand::CreateReg(0));
+    // For pre- and post-indexed case, add a reg0 operand (Addressing Mode #2).
+    // Otherwise, skip the reg operand since for addrmode_imm12, Rn has already
+    // been populated.
+    if (isPrePost) {
+      MI.addOperand(MCOperand::CreateReg(0));
+      OpIdx += 1;
+    }
 
-    // Disassemble the 12-bit immediate offset.
     unsigned Imm12 = slice(insn, 11, 0);
-    unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift);
-    MI.addOperand(MCOperand::CreateImm(Offset));
+    if (Opcode == ARM::LDRBi12 || Opcode == ARM::LDRi12 ||
+        Opcode == ARM::STRBi12 || Opcode == ARM::STRi12) {
+      // Disassemble the 12-bit immediate offset, which is the second operand in
+      // $addrmode_imm12 => (ops GPR:$base, i32imm:$offsimm).    
+      int Offset = AddrOpcode == ARM_AM::add ? 1 * Imm12 : -1 * Imm12;
+      MI.addOperand(MCOperand::CreateImm(Offset));
+    } else {
+      // Disassemble the 12-bit immediate offset, which is the second operand in
+      // $am2offset => (ops GPR, i32imm).
+      unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift,
+                                          IndexMode);
+      MI.addOperand(MCOperand::CreateImm(Offset));
+    }
+    OpIdx += 1;
   } else {
+    // If Inst{25} = 1 and Inst{4} != 0, we should reject this as invalid.
+    if (slice(insn,4,4) == 1)
+      return false;
+
     // Disassemble the offset reg (Rm), shift type, and immediate shift length.
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
@@ -1101,9 +1348,9 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     // A8.4.1.  Possible rrx or shift amount of 32...
     getImmShiftSE(ShOp, ShImm);
     MI.addOperand(MCOperand::CreateImm(
-                    ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp)));
+                    ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp, IndexMode)));
+    OpIdx += 2;
   }
-  OpIdx += 2;
 
   return true;
 }
@@ -1125,7 +1372,7 @@ static bool HasDualReg(unsigned Opcode) {
   case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST:
   case ARM::STRD: case ARM::STRD_PRE: case ARM::STRD_POST:
     return true;
-  }  
+  }
 }
 
 static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
@@ -1153,8 +1400,6 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     ++OpIdx;
   }
 
-  bool DualReg = HasDualReg(Opcode);
-
   // Disassemble the dst/src operand.
   if (OpIdx >= NumOps)
     return false;
@@ -1165,8 +1410,8 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
                                                      decodeRd(insn))));
   ++OpIdx;
 
-  // Fill in LDRD and STRD's second operand.
-  if (DualReg) {
+  // Fill in LDRD and STRD's second operand Rt operand.
+  if (HasDualReg(Opcode)) {
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRd(insn) + 1)));
     ++OpIdx;
@@ -1188,7 +1433,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
          "Reg operand expected");
   assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
-         && "Index mode or tied_to operand expected");
+         && "Offset mode or tied_to operand expected");
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
   ++OpIdx;
@@ -1204,19 +1449,22 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
          "Expect 1 reg operand followed by 1 imm operand");
 
   ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+  unsigned IndexMode =
+                  (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
   if (getAM3IBit(insn) == 1) {
     MI.addOperand(MCOperand::CreateReg(0));
 
     // Disassemble the 8-bit immediate offset.
     unsigned Imm4H = (insn >> ARMII::ImmHiShift) & 0xF;
     unsigned Imm4L = insn & 0xF;
-    unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L);
+    unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L,
+                                        IndexMode);
     MI.addOperand(MCOperand::CreateImm(Offset));
   } else {
     // Disassemble the offset reg (Rm).
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
-    unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0);
+    unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0, IndexMode);
     MI.addOperand(MCOperand::CreateImm(Offset));
   }
   OpIdx += 2;
@@ -1236,13 +1484,13 @@ static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 // The algorithm for disassembly of LdStMulFrm is different from others because
-// it explicitly populates the two predicate operands after operand 0 (the base)
-// and operand 1 (the AM4 mode imm).  After operand 3, we need to populate the
-// reglist with each affected register encoded as an MCOperand.
+// it explicitly populates the two predicate operands after the base register.
+// After that, we need to populate the reglist with each affected register
+// encoded as an MCOperand.
 static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5");
+  assert(NumOps >= 4 && "LdStMulFrm expects NumOps >= 4");
   NumOpsAdded = 0;
 
   unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
@@ -1260,8 +1508,10 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   MI.addOperand(MCOperand::CreateReg(Base));
 
   // Handling the two predicate operands before the reglist.
-  int64_t CondVal = insn >> ARMII::CondShift;
-  MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal));
+  int64_t CondVal = getCondField(insn);
+  if (CondVal == 0xF)
+    return false;
+  MI.addOperand(MCOperand::CreateImm(CondVal));
   MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
 
   NumOpsAdded += 3;
@@ -1352,6 +1602,12 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
 
+  // Sanity check the registers, which should not be 15.
+  if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+    return false;
+  if (ThreeReg && decodeRn(insn) == 15)
+    return false;
+
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRd(insn))));
   ++OpIdx;
@@ -1376,7 +1632,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     ARM_AM::ShiftOpc Opc = ARM_AM::no_shift;
     if (Opcode == ARM::PKHBT)
       Opc = ARM_AM::lsl;
-    else if (Opcode == ARM::PKHBT)
+    else if (Opcode == ARM::PKHTB)
       Opc = ARM_AM::asr;
     getImmShiftSE(Opc, ShiftAmt);
     MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt)));
@@ -1391,6 +1647,11 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
+  // A8.6.183 SSAT
+  // if d == 15 || n == 15 then UNPREDICTABLE;
+  if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+    return false;
+
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
 
@@ -1429,6 +1690,11 @@ static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
+  // A8.6.220 SXTAB
+  // if d == 15 || m == 15 then UNPREDICTABLE;
+  if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+    return false;
+
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
@@ -1611,7 +1877,7 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 // A8.6.295 vcvt (floating-point <-> integer)
 // Int to FP: VSITOD, VSITOS, VUITOD, VUITOS
 // FP to Int: VTOSI[Z|R]D, VTOSI[Z|R]S, VTOUI[Z|R]D, VTOUI[Z|R]S
-// 
+//
 // A8.6.297 vcvt (floating-point and fixed-point)
 // Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i))
 static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
@@ -1800,15 +2066,14 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 // VFP Load/Store Multiple Instructions.
-// This is similar to the algorithm for LDM/STM in that operand 0 (the base) and
-// operand 1 (the AM4 mode imm) is followed by two predicate operands.  It is
-// followed by a reglist of either DPR(s) or SPR(s).
+// We have an optional write back reg, the base, and two predicate operands.
+// It is then followed by a reglist of either DPR(s) or SPR(s).
 //
 // VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
 static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  assert(NumOps >= 5 && "VFPLdStMulFrm expects NumOps >= 5");
+  assert(NumOps >= 4 && "VFPLdStMulFrm expects NumOps >= 4");
 
   unsigned &OpIdx = NumOpsAdded;
 
@@ -1827,25 +2092,18 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   MI.addOperand(MCOperand::CreateReg(Base));
 
-  // Next comes the AM4 Opcode.
-  ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
-  // Must be either "ia" or "db" submode.
-  if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) {
-    DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n");
-    return false;
-  }
-  MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
-
   // Handling the two predicate operands before the reglist.
-  int64_t CondVal = insn >> ARMII::CondShift;
-  MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal));
+  int64_t CondVal = getCondField(insn);
+  if (CondVal == 0xF)
+    return false;
+  MI.addOperand(MCOperand::CreateImm(CondVal));
   MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
 
-  OpIdx += 4;
+  OpIdx += 3;
 
-  bool isSPVFP = (Opcode == ARM::VLDMSIA     || Opcode == ARM::VLDMSDB     ||
+  bool isSPVFP = (Opcode == ARM::VLDMSIA     ||
                   Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD ||
-                  Opcode == ARM::VSTMSIA     || Opcode == ARM::VSTMSDB     ||
+                  Opcode == ARM::VSTMSIA     ||
                   Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD);
   unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
 
@@ -1855,6 +2113,11 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Fill the variadic part of reglist.
   unsigned char Imm8 = insn & 0xFF;
   unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
+
+  // Apply some sanity checks before proceeding.
+  if (Regs == 0 || (RegD + Regs) > 32 || (!isSPVFP && Regs > 16))
+    return false;
+
   for (unsigned i = 0; i < Regs; ++i) {
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
                                                        RegD + i)));
@@ -2136,7 +2399,7 @@ static unsigned decodeN3VImm(uint32_t insn) {
 // Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it.
 static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced,
-    BO B) {
+    unsigned alignment, BO B) {
 
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -2180,9 +2443,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
 
     assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
+    // addrmode6 := (ops GPR:$addr, i32imm)
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        Rn)));
-    MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
+    MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment
     OpIdx += 2;
 
     if (WB) {
@@ -2230,9 +2494,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
 
     assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
+    // addrmode6 := (ops GPR:$addr, i32imm)
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        Rn)));
-    MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
+    MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment
     OpIdx += 2;
 
     if (WB) {
@@ -2263,6 +2528,92 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
   return true;
 }
 
+// A8.6.308, A8.6.311, A8.6.314, A8.6.317.
+static bool Align4OneLaneInst(unsigned elem, unsigned size,
+    unsigned index_align, unsigned & alignment) {
+  unsigned bits = 0;
+  switch (elem) {
+  default:
+    return false;
+  case 1:
+    // A8.6.308
+    if (size == 0)
+      return slice(index_align, 0, 0) == 0;
+    else if (size == 1) {
+      bits = slice(index_align, 1, 0);
+      if (bits != 0 && bits != 1)
+        return false;
+      if (bits == 1)
+        alignment = 16;
+      return true;
+    } else if (size == 2) {
+      bits = slice(index_align, 2, 0);
+      if (bits != 0 && bits != 3)
+        return false;
+      if (bits == 3)
+        alignment = 32;
+      return true;;
+    }
+    return true;
+  case 2:
+    // A8.6.311
+    if (size == 0) {
+      if (slice(index_align, 0, 0) == 1)
+        alignment = 16;
+      return true;
+    } if (size == 1) {
+      if (slice(index_align, 0, 0) == 1)
+        alignment = 32;
+      return true;
+    } else if (size == 2) {
+      if (slice(index_align, 1, 1) != 0)
+        return false;
+      if (slice(index_align, 0, 0) == 1)
+        alignment = 64;
+      return true;;
+    }
+    return true;
+  case 3:
+    // A8.6.314
+    if (size == 0) {
+      if (slice(index_align, 0, 0) != 0)
+        return false;
+      return true;
+    } if (size == 1) {
+      if (slice(index_align, 0, 0) != 0)
+        return false;
+      return true;
+      return true;
+    } else if (size == 2) {
+      if (slice(index_align, 1, 0) != 0)
+        return false;
+      return true;;
+    }
+    return true;
+  case 4:
+    // A8.6.317
+    if (size == 0) {
+      if (slice(index_align, 0, 0) == 1)
+        alignment = 32;
+      return true;
+    } if (size == 1) {
+      if (slice(index_align, 0, 0) == 1)
+        alignment = 64;
+      return true;
+    } else if (size == 2) {
+      bits = slice(index_align, 1, 0);
+      if (bits == 3)
+        return false;
+      if (bits == 1)
+        alignment = 64;
+      else if (bits == 2)
+        alignment = 128;
+      return true;;
+    }
+    return true;
+  }
+}
+
 // A7.7
 // If L (Inst{21}) == 0, store instructions.
 // Find out about double-spaced-ness of the Opcode and pass it on to
@@ -2272,11 +2623,33 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   const StringRef Name = ARMInsts[Opcode].Name;
   bool DblSpaced = false;
+  // 0 represents standard alignment, i.e., unaligned data access.
+  unsigned alignment = 0;
+
+  unsigned elem = 0; // legal values: {1, 2, 3, 4}
+  if (Name.startswith("VST1") || Name.startswith("VLD1"))
+    elem = 1;
+
+  if (Name.startswith("VST2") || Name.startswith("VLD2"))
+    elem = 2;
+
+  if (Name.startswith("VST3") || Name.startswith("VLD3"))
+    elem = 3;
+
+  if (Name.startswith("VST4") || Name.startswith("VLD4"))
+    elem = 4;
 
   if (Name.find("LN") != std::string::npos) {
     // To one lane instructions.
     // See, for example, 8.6.317 VLD4 (single 4-element structure to one lane).
 
+    // Utility function takes number of elements, size, and index_align.
+    if (!Align4OneLaneInst(elem,
+                           slice(insn, 11, 10),
+                           slice(insn, 7, 4),
+                           alignment))
+      return false;
+
     // <size> == 16 && Inst{5} == 1 --> DblSpaced = true
     if (Name.endswith("16") || Name.endswith("16_UPD"))
       DblSpaced = slice(insn, 5, 5) == 1;
@@ -2284,30 +2657,102 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
     // <size> == 32 && Inst{6} == 1 --> DblSpaced = true
     if (Name.endswith("32") || Name.endswith("32_UPD"))
       DblSpaced = slice(insn, 6, 6) == 1;
-
+  } else if (Name.find("DUP") != std::string::npos) {
+    // Single element (or structure) to all lanes.
+    // Inst{9-8} encodes the number of element(s) in the structure, with:
+    // 0b00 (VLD1DUP) (for this, a bit makes sense only for data size 16 and 32.
+    // 0b01 (VLD2DUP)
+    // 0b10 (VLD3DUP) (for this, a bit must be encoded as 0)
+    // 0b11 (VLD4DUP)
+    //
+    // Inst{7-6} encodes the data size, with:
+    // 0b00 => 8, 0b01 => 16, 0b10 => 32
+    //
+    // Inst{4} (the a bit) encodes the align action (0: standard alignment)
+    unsigned elem = slice(insn, 9, 8) + 1;
+    unsigned a = slice(insn, 4, 4);
+    if (elem != 3) {
+      // 0b11 is not a valid encoding for Inst{7-6}.
+      if (slice(insn, 7, 6) == 3)
+        return false;
+      unsigned data_size = 8 << slice(insn, 7, 6);
+      // For VLD1DUP, a bit makes sense only for data size of 16 and 32.
+      if (a && data_size == 8)
+        return false;
+
+      // Now we can calculate the alignment!
+      if (a)
+        alignment = elem * data_size;
+    } else {
+      if (a) {
+        // A8.6.315 VLD3 (single 3-element structure to all lanes)
+        // The a bit must be encoded as 0.
+        return false;
+      }
+    }
   } else {
     // Multiple n-element structures with type encoded as Inst{11-8}.
     // See, for example, A8.6.316 VLD4 (multiple 4-element structures).
 
-    // n == 2 && type == 0b1001 -> DblSpaced = true
-    if (Name.startswith("VST2") || Name.startswith("VLD2"))
-      DblSpaced = slice(insn, 11, 8) == 9;
-    
-    // n == 3 && type == 0b0101 -> DblSpaced = true
-    if (Name.startswith("VST3") || Name.startswith("VLD3"))
-      DblSpaced = slice(insn, 11, 8) == 5;
-    
-    // n == 4 && type == 0b0001 -> DblSpaced = true
-    if (Name.startswith("VST4") || Name.startswith("VLD4"))
-      DblSpaced = slice(insn, 11, 8) == 1;
-    
+    // Inst{5-4} encodes alignment.
+    unsigned align = slice(insn, 5, 4);
+    switch (align) {
+    default:
+      break;
+    case 1:
+      alignment = 64; break;
+    case 2:
+      alignment = 128; break;
+    case 3:
+      alignment = 256; break;
+    }
+
+    unsigned type = slice(insn, 11, 8);
+    // Reject UNDEFINED instructions based on type and align.
+    // Plus set DblSpaced flag where appropriate.
+    switch (elem) {
+    default:
+      break;
+    case 1:
+      // n == 1
+      // A8.6.307 & A8.6.391
+      if ((type == 7  && slice(align, 1, 1) == 1) ||
+          (type == 10 && align == 3) ||
+          (type == 6  && slice(align, 1, 1) == 1))
+        return false;
+      break;
+    case 2:
+      // n == 2 && type == 0b1001 -> DblSpaced = true
+      // A8.6.310 & A8.6.393
+      if ((type == 8 || type == 9) && align == 3)
+        return false;
+      DblSpaced = (type == 9);
+      break;
+    case 3:
+      // n == 3 && type == 0b0101 -> DblSpaced = true
+      // A8.6.313 & A8.6.395
+      if (slice(insn, 7, 6) == 3 || slice(align, 1, 1) == 1)
+        return false;
+      DblSpaced = (type == 5);
+      break;
+    case 4:
+      // n == 4 && type == 0b0001 -> DblSpaced = true
+      // A8.6.316 & A8.6.397
+      if (slice(insn, 7, 6) == 3)
+        return false;
+      DblSpaced = (type == 1);
+      break;
+    }
   }
   return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded,
-                           slice(insn, 21, 21) == 0, DblSpaced, B);
+                           slice(insn, 21, 21) == 0, DblSpaced, alignment/8, B);
 }
 
 // VMOV (immediate)
 //   Qd/Dd imm
+// VBIC (immediate)
+// VORR (immediate)
+//   Qd/Dd imm src(=Qd/Dd)
 static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
@@ -2334,12 +2779,20 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
   case ARM::VMOVv8i16:
   case ARM::VMVNv4i16:
   case ARM::VMVNv8i16:
+  case ARM::VBICiv4i16:
+  case ARM::VBICiv8i16:
+  case ARM::VORRiv4i16:
+  case ARM::VORRiv8i16:
     esize = ESize16;
     break;
   case ARM::VMOVv2i32:
   case ARM::VMOVv4i32:
   case ARM::VMVNv2i32:
   case ARM::VMVNv4i32:
+  case ARM::VBICiv2i32:
+  case ARM::VBICiv4i32:
+  case ARM::VORRiv2i32:
+  case ARM::VORRiv4i32:
     esize = ESize32;
     break;
   case ARM::VMOVv1i64:
@@ -2347,7 +2800,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
     esize = ESize64;
     break;
   default:
-    assert(0 && "Unreachable code!");
+    assert(0 && "Unexpected opcode!");
     return false;
   }
 
@@ -2356,6 +2809,16 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
   MI.addOperand(MCOperand::CreateImm(decodeN1VImm(insn, esize)));
 
   NumOpsAdded = 2;
+
+  // VBIC/VORRiv*i* variants have an extra $src = $Vd to be filled in.
+  if (NumOps >= 3 &&
+      (OpInfo[2].RegClass == ARM::DPRRegClassID ||
+       OpInfo[2].RegClass == ARM::QPRRegClassID)) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass,
+                                                     decodeNEONRd(insn))));
+    NumOpsAdded += 1;
+  }
+
   return true;
 }
 
@@ -2376,7 +2839,7 @@ enum N2VFlag {
 //
 // Vector Move Long:
 //   Qd Dm
-// 
+//
 // Vector Move Narrow:
 //   Dd Qm
 //
@@ -2518,7 +2981,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
   assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected");
 
   // Add the imm operand.
-  
+
   // VSHLL has maximum shift count as the imm, inferred from its size.
   unsigned Imm;
   switch (Opcode) {
@@ -2631,7 +3094,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
   // N3RegFrm.
   if (Opcode == ARM::VMOVDneon || Opcode == ARM::VMOVQ)
     return true;
-  
+
   // Dm = Inst{5:3-0} => NEON Rm
   // or
   // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise
@@ -2770,7 +3233,7 @@ static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   ElemSize esize =
     Opcode == ARM::VGETLNi32 ? ESize32
       : ((Opcode == ARM::VGETLNs16 || Opcode == ARM::VGETLNu16) ? ESize16
-                                                                : ESize32);
+                                                                : ESize8);
 
   // Rt = Inst{15-12} => ARM Rd
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -2852,17 +3315,6 @@ static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   return true;
 }
 
-// A8.6.41 DMB
-// A8.6.42 DSB
-// A8.6.49 ISB
-static inline bool MemBarrierInstr(uint32_t insn) {
-  unsigned op7_4 = slice(insn, 7, 4);
-  if (slice(insn, 31, 8) == 0xf57ff0 && (op7_4 >= 4 && op7_4 <= 6))
-    return true;
-
-  return false;
-}
-
 static inline bool PreLoadOpcode(unsigned Opcode) {
   switch(Opcode) {
   case ARM::PLDi12:  case ARM::PLDrs:
@@ -2878,8 +3330,8 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   // Preload Data/Instruction requires either 2 or 3 operands.
-  // PLDi, PLDWi, PLIi:                addrmode_imm12
-  // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: ldst_so_reg
+  // PLDi12, PLDWi12, PLIi12: addrmode_imm12
+  // PLDrs, PLDWrs, PLIrs:    ldst_so_reg
 
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
@@ -2888,10 +3340,19 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
       || Opcode == ARM::PLIi12) {
     unsigned Imm12 = slice(insn, 11, 0);
     bool Negative = getUBit(insn) == 0;
+
+    // A8.6.118 PLD (literal) PLDWi12 with Rn=PC is transformed to PLDi12.
+    if (Opcode == ARM::PLDWi12 && slice(insn, 19, 16) == 0xF) {
+      DEBUG(errs() << "Rn == '1111': PLDWi12 morphed to PLDi12\n");
+      MI.setOpcode(ARM::PLDi12);
+    }
+    
     // -0 is represented specially. All other values are as normal.
+    int Offset = Negative ? -1 * Imm12 : Imm12;
     if (Imm12 == 0 && Negative)
-      Imm12 = INT32_MIN;
-    MI.addOperand(MCOperand::CreateImm(Imm12));
+      Offset = INT32_MIN;
+
+    MI.addOperand(MCOperand::CreateImm(Offset));
     NumOpsAdded = 2;
   } else {
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -2917,14 +3378,20 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  if (MemBarrierInstr(insn)) {
-    // DMBsy, DSBsy, and ISBsy instructions have zero operand and are taken care
-    // of within the generic ARMBasicMCBuilder::BuildIt() method.
-    //
+  if (Opcode == ARM::DMB || Opcode == ARM::DSB) {
     // Inst{3-0} encodes the memory barrier option for the variants.
-    MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
-    NumOpsAdded = 1;
-    return true;
+    unsigned opt = slice(insn, 3, 0);
+    switch (opt) {
+    case ARM_MB::SY:  case ARM_MB::ST:
+    case ARM_MB::ISH: case ARM_MB::ISHST:
+    case ARM_MB::NSH: case ARM_MB::NSHST:
+    case ARM_MB::OSH: case ARM_MB::OSHST:
+      MI.addOperand(MCOperand::CreateImm(opt));
+      NumOpsAdded = 1;
+      return true;
+    default:
+      return false;
+    }
   }
 
   switch (Opcode) {
@@ -2936,6 +3403,11 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   case ARM::WFI:
   case ARM::SEV:
     return true;
+  case ARM::SWP:
+  case ARM::SWPB:
+    // SWP, SWPB: Rd Rm Rn
+    // Delegate to DisassembleLdStExFrm()....
+    return DisassembleLdStExFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B);
   default:
     break;
   }
@@ -2950,20 +3422,32 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   // opcodes which match the same real instruction. This is needed since there's
   // no current handling of optional arguments. Fix here when a better handling
   // of optional arguments is implemented.
-  if (Opcode == ARM::CPS3p) {
+  if (Opcode == ARM::CPS3p) {   // M = 1
+    // Let's reject these impossible imod values by returning false:
+    // 1. (imod=0b01)
+    //
+    // AsmPrinter cannot handle imod=0b00, plus (imod=0b00,M=1,iflags!=0) is an
+    // invalid combination, so we just check for imod=0b00 here.
+    if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1)
+      return false;
     MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
     MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6)));   // iflags
     MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));   // mode
     NumOpsAdded = 3;
     return true;
   }
-  if (Opcode == ARM::CPS2p) {
+  if (Opcode == ARM::CPS2p) { // mode = 0, M = 0
+    // Let's reject these impossible imod values by returning false:
+    // 1. (imod=0b00,M=0)
+    // 2. (imod=0b01)
+    if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1)
+      return false;
     MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
     MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6)));   // iflags
     NumOpsAdded = 2;
     return true;
   }
-  if (Opcode == ARM::CPS1p) {
+  if (Opcode == ARM::CPS1p) { // imod = 0, iflags = 0, M = 1
     MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
     NumOpsAdded = 1;
     return true;
@@ -3142,7 +3626,7 @@ bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode,
 
   return false;
 }
-  
+
 /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
 /// the possible Predicate and SBitModifier, to build the remaining MCOperand
 /// constituents.
@@ -3154,6 +3638,7 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
   const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   const std::string &Name = ARMInsts[Opcode].Name;
   unsigned Idx = MI.getNumOperands();
+  uint64_t TSFlags = ARMInsts[Opcode].TSFlags;
 
   // First, we check whether this instr specifies the PredicateOperand through
   // a pair of TargetOperandInfos with isPredicate() property.
@@ -3173,14 +3658,23 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
         // like ARM.
         //
         // A8.6.16 B
-        if (Name == "t2Bcc")
-          MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 25, 22))));
-        else if (Name == "tBcc")
-          MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 11, 8))));
-        else
+        // Check for undefined encodings.
+        unsigned cond;
+        if (Name == "t2Bcc") {
+          if ((cond = slice(insn, 25, 22)) >= 14)
+            return false;
+          MI.addOperand(MCOperand::CreateImm(CondCode(cond)));
+        } else if (Name == "tBcc") {
+          if ((cond = slice(insn, 11, 8)) == 14)
+            return false;
+          MI.addOperand(MCOperand::CreateImm(CondCode(cond)));
+        } else
           MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
       } else {
         // ARM instructions get their condition field from Inst{31-28}.
+        // We should reject Inst{31-28} = 0b1111 as invalid encoding.
+        if (!isNEONDomain(TSFlags) && getCondField(insn) == 0xF)
+          return false;
         MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn))));
       }
     }
@@ -3243,3 +3737,84 @@ ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) {
   return new ARMBasicMCBuilder(Opcode, Format,
                                ARMInsts[Opcode].getNumOperands());
 }
+
+/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
+/// operand in place of the immediate Value in the MCInst.  The immediate
+/// Value has had any PC adjustment made by the caller.  If the getOpInfo()
+/// function was set as part of the setupBuilderForSymbolicDisassembly() call
+/// then that function is called to get any symbolic information at the
+/// builder's Address for this instrution.  If that returns non-zero then the
+/// symbolic information it returns is used to create an MCExpr and that is
+/// added as an operand to the MCInst.  This function returns true if it adds
+/// an operand to the MCInst and false otherwise.
+bool ARMBasicMCBuilder::tryAddingSymbolicOperand(uint64_t Value,
+                                                 uint64_t InstSize,
+                                                 MCInst &MI) {
+  if (!GetOpInfo)
+    return false;
+
+  struct LLVMOpInfo1 SymbolicOp;
+  SymbolicOp.Value = Value;
+  if (!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp))
+    return false;
+
+  const MCExpr *Add = NULL;
+  if (SymbolicOp.AddSymbol.Present) {
+    if (SymbolicOp.AddSymbol.Name) {
+      StringRef Name(SymbolicOp.AddSymbol.Name);
+      MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+      Add = MCSymbolRefExpr::Create(Sym, *Ctx);
+    } else {
+      Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
+    }
+  }
+
+  const MCExpr *Sub = NULL;
+  if (SymbolicOp.SubtractSymbol.Present) {
+    if (SymbolicOp.SubtractSymbol.Name) {
+      StringRef Name(SymbolicOp.SubtractSymbol.Name);
+      MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+      Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
+    } else {
+      Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
+    }
+  }
+
+  const MCExpr *Off = NULL;
+  if (SymbolicOp.Value != 0)
+    Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
+
+  const MCExpr *Expr;
+  if (Sub) {
+    const MCExpr *LHS;
+    if (Add)
+      LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
+    else
+      LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
+    if (Off != 0)
+      Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
+    else
+      Expr = LHS;
+  } else if (Add) {
+    if (Off != 0)
+      Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
+    else
+      Expr = Add;
+  } else {
+    if (Off != 0)
+      Expr = Off;
+    else
+      Expr = MCConstantExpr::Create(0, *Ctx);
+  }
+
+  if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16)
+    MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx)));
+  else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16)
+    MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
+  else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
+    MI.addOperand(MCOperand::CreateExpr(Expr));
+  else 
+    assert("bad SymbolicOp.VariantKind");
+
+  return true;
+}
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
index 9c30d33..a7ba141 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -22,12 +22,17 @@
 #define ARMDISASSEMBLERCORE_H
 
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm-c/Disassembler.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMRegisterInfo.h"
 #include "ARMDisassembler.h"
 
 namespace llvm {
+class MCContext;
 
 class ARMUtils {
 public:
@@ -134,6 +139,31 @@ static inline void setSlice(unsigned &Bits, unsigned From, unsigned To,
   Bits |= (Val & Mask) << To;
 }
 
+// Return an integer result equal to the number of bits of x that are ones.
+static inline uint32_t
+BitCount (uint64_t x)
+{
+    // c accumulates the total bits set in x
+    uint32_t c;
+    for (c = 0; x; ++c)
+    {
+        x &= x - 1; // clear the least significant bit set
+    }
+    return c;
+}
+
+static inline bool
+BitIsSet (const uint64_t value, const uint64_t bit)
+{
+    return (value & (1ull << bit)) != 0;
+}
+
+static inline bool
+BitIsClear (const uint64_t value, const uint64_t bit)
+{
+    return (value & (1ull << bit)) == 0;
+}
+
 /// Various utilities for checking the target specific flags.
 
 /// A unary data processing instruction doesn't have an Rn operand.
@@ -141,6 +171,12 @@ static inline bool isUnaryDP(uint64_t TSFlags) {
   return (TSFlags & ARMII::UnaryDP);
 }
 
+/// A NEON Domain instruction has cond field (Inst{31-28}) as 0b1111.
+static inline bool isNEONDomain(uint64_t TSFlags) {
+  return (TSFlags & ARMII::DomainNEON) ||
+         (TSFlags & ARMII::DomainNEONA8);
+}
+
 /// This four-bit field describes the addressing mode used.
 /// See also ARMBaseInstrInfo.h.
 static inline unsigned getAddrMode(uint64_t TSFlags) {
@@ -196,7 +232,7 @@ private:
 public:
   ARMBasicMCBuilder(ARMBasicMCBuilder &B)
     : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm),
-      SP(B.SP) {
+      SP(B.SP), GetOpInfo(0), DisInfo(0), Ctx(0) {
     Err = 0;
   }
 
@@ -255,6 +291,44 @@ private:
     assert(SP);
     return slice(SP->ITState, 7, 4);
   }
+
+private:
+  //
+  // Hooks for symbolic disassembly via the public 'C' interface.
+  //
+  // The function to get the symbolic information for operands.
+  LLVMOpInfoCallback GetOpInfo;
+  // The pointer to the block of symbolic information for above call back.
+  void *DisInfo;
+  // The assembly context for creating symbols and MCExprs in place of
+  // immediate operands when there is symbolic information.
+  MCContext *Ctx;
+  // The address of the instruction being disassembled.
+  uint64_t Address;
+
+public:
+  void setupBuilderForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo,
+                                          void *disInfo, MCContext *ctx,
+                                          uint64_t address) {
+    GetOpInfo = getOpInfo;
+    DisInfo = disInfo;
+    Ctx = ctx;
+    Address = address;
+  }
+
+  uint64_t getBuilderAddress() const { return Address; }
+
+  /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
+  /// operand in place of the immediate Value in the MCInst.  The immediate
+  /// Value has had any PC adjustment made by the caller.  If the getOpInfo()
+  /// function was set as part of the setupBuilderForSymbolicDisassembly() call
+  /// then that function is called to get any symbolic information at the
+  /// builder's Address for this instrution.  If that returns non-zero then the
+  /// symbolic information it returns is used to create an MCExpr and that is
+  /// added as an operand to the MCInst.  This function returns true if it adds
+  /// an operand to the MCInst and false otherwise.
+  bool tryAddingSymbolicOperand(uint64_t Value, uint64_t InstSize, MCInst &MI);
+
 };
 
 } // namespace llvm
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 23372e0..8d39982 100644
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -108,6 +108,8 @@ static inline bool IsGPR(unsigned RegClass) {
 
 // Utilities for 32-bit Thumb instructions.
 
+static inline bool BadReg(uint32_t n) { return n == 13 || n == 15; }
+
 // Extract imm4: Inst{19-16}.
 static inline unsigned getImm4(uint32_t insn) {
   return slice(insn, 19, 16);
@@ -398,9 +400,17 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
     assert(OpInfo[OpIdx].RegClass < 0 &&
            !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
            && "Pure imm operand expected");
-    MI.addOperand(MCOperand::CreateImm(UseRt ? getT1Imm8(insn)
-                                             : (Imm3 ? getT1Imm3(insn)
-                                                     : getT1Imm5(insn))));
+    unsigned Imm = 0;
+    if (UseRt)
+      Imm = getT1Imm8(insn);
+    else if (Imm3)
+      Imm = getT1Imm3(insn);
+    else {
+      Imm = getT1Imm5(insn);
+      ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 12, 11));
+      getImmShiftSE(ShOp, Imm);
+    }
+    MI.addOperand(MCOperand::CreateImm(Imm));
   }
   ++OpIdx;
 
@@ -469,6 +479,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
 // tBX_RET: 0 operand
 // tBX_RET_vararg: Rm
 // tBLXr_r9: Rm
+// tBRIND: Rm
 static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
@@ -476,11 +487,17 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (NumOps == 0)
     return true;
 
-  // BX/BLX has 1 reg operand: Rm.
-  if (NumOps == 1) {
+  // BX/BLX/tBRIND (indirect branch, i.e, mov pc, Rm) has 1 reg operand: Rm.
+  if (Opcode==ARM::tBLXr_r9 || Opcode==ARM::tBX_Rm || Opcode==ARM::tBRIND) {
+    if (Opcode != ARM::tBRIND) {
+      // Handling the two predicate operands before the reg operand.
+      if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+        return false;
+      NumOpsAdded += 2;
+    }
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        getT1Rm(insn))));
-    NumOpsAdded = 1;
+    NumOpsAdded += 1;
     return true;
   }
 
@@ -598,7 +615,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
 
 // A6.2.4 Load/store single data item
 //
-// Load/Store Register (reg|imm):      tRd tRn imm5 tRm
+// Load/Store Register (reg|imm):      tRd tRn imm5|tRm
 // Load Register Signed Byte|Halfword: tRd tRn tRm
 static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -607,11 +624,6 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
   const TargetOperandInfo *OpInfo = TID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
-  // Table A6-5 16-bit Thumb Load/store instructions
-  // opA = 0b0101 for STR/LDR (register) and friends.
-  // Otherwise, we have STR/LDR (immediate) and friends.
-  bool Imm5 = (opA != 5);
-
   assert(NumOps >= 2
          && OpInfo[0].RegClass == ARM::tGPRRegClassID
          && OpInfo[1].RegClass == ARM::tGPRRegClassID
@@ -624,28 +636,28 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
                                                      getT1tRn(insn))));
   OpIdx = 2;
 
-  // We have either { imm5, tRm } or { tRm } remaining.
-  // Process the imm5 first.  Note that STR/LDR (register) should skip the imm5
-  // offset operand for t_addrmode_s[1|2|4].
+  // We have either { imm5 } or { tRm } remaining.
+  // Note that STR/LDR (register) should skip the imm5 offset operand for
+  // t_addrmode_s[1|2|4].
 
   assert(OpIdx < NumOps && "More operands expected");
 
   if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() &&
       !OpInfo[OpIdx].isOptionalDef()) {
-
-    MI.addOperand(MCOperand::CreateImm(Imm5 ? getT1Imm5(insn) : 0));
+    // Table A6-5 16-bit Thumb Load/store instructions
+    // opA = 0b0101 for STR/LDR (register) and friends.
+    // Otherwise, we have STR/LDR (immediate) and friends.
+    assert(opA != 5 && "Immediate operand expected for this opcode");
+    MI.addOperand(MCOperand::CreateImm(getT1Imm5(insn)));
+    ++OpIdx;
+  } else {
+    // The next reg operand is tRm, the offset.
+    assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
+           && "Thumb reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                       getT1tRm(insn))));
     ++OpIdx;
   }
-
-  // The next reg operand is tRm, the offset.
-  assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
-         && "Thumb reg operand expected");
-  MI.addOperand(MCOperand::CreateReg(
-                  Imm5 ? 0
-                       : getRegisterEnum(B, ARM::tGPRRegClassID,
-                                         getT1tRm(insn))));
-  ++OpIdx;
-
   return true;
 }
 
@@ -895,6 +907,10 @@ static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
   }
 
   unsigned RegListBits = slice(insn, 7, 0);
+  if (BitCount(RegListBits) < 1) {
+    DEBUG(errs() << "if BitCount(registers) < 1 then UNPREDICTABLE\n");
+    return false;
+  }
 
   // Fill the variadic part of reglist.
   for (unsigned i = 0; i < 8; ++i)
@@ -945,6 +961,11 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
                                       : (int)Imm8));
 
   // Predicate operands by ARMBasicMCBuilder::TryPredicateAndSBitModifier().
+  // But note that for tBcc, if cond = '1110' then UNDEFINED.
+  if (Opcode == ARM::tBcc && slice(insn, 11, 8) == 14) {
+    DEBUG(errs() << "if cond = '1110' then UNDEFINED\n");
+    return false;
+  }
   NumOpsAdded = 1;
 
   return true;
@@ -965,11 +986,7 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   unsigned Imm11 = getT1Imm11(insn);
 
-  // When executing a Thumb instruction, PC reads as the address of the current
-  // instruction plus 4.  The assembler subtracts 4 from the difference between
-  // the branch instruction and the target address, disassembler has to add 4 to
-  // to compensate.
-  MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1) + 4));
+  MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1)));
 
   NumOpsAdded = 1;
 
@@ -1129,8 +1146,12 @@ static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn,
 // t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn
 static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
-                                                     decodeRn(insn))));
+  unsigned Rn = decodeRn(insn);
+  if (Rn == 15) {
+    DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n");
+    return false;
+  }
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,ARM::GPRRegClassID,Rn)));
   NumOpsAdded = 1;
   return true;
 }
@@ -1149,7 +1170,7 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
           Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD ||
           Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD)
          && "Unexpected opcode");
-  assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5");
+  assert(NumOps >= 4 && "Thumb2 LdStMul expects NumOps >= 4");
 
   NumOpsAdded = 0;
 
@@ -1203,45 +1224,79 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
   OpIdx = 0;
 
   assert(NumOps >= 2
-         && OpInfo[0].RegClass == ARM::GPRRegClassID
-         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && OpInfo[0].RegClass > 0
+         && OpInfo[1].RegClass > 0
          && "Expect >=2 operands and first two as reg operands");
 
   bool isStore = (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH);
   bool isSW = (Opcode == ARM::t2LDREX || Opcode == ARM::t2STREX);
   bool isDW = (Opcode == ARM::t2LDREXD || Opcode == ARM::t2STREXD);
 
+  unsigned Rt  = decodeRd(insn);
+  unsigned Rt2 = decodeRs(insn); // But note that this is Rd for t2STREX.
+  unsigned Rd  = decodeRm(insn);
+  unsigned Rn  = decodeRn(insn);
+
+  // Some sanity checking first.
+  if (isStore) {
+    // if d == n || d == t then UNPREDICTABLE
+    // if d == n || d == t || d == t2 then UNPREDICTABLE
+    if (isDW) {
+      if (Rd == Rn || Rd == Rt || Rd == Rt2) {
+        DEBUG(errs() << "if d == n || d == t || d == t2 then UNPREDICTABLE\n");
+        return false;
+      }
+    } else {
+      if (isSW) {
+        if (Rt2 == Rn || Rt2 == Rt) {
+          DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n");
+          return false;
+        }
+      } else {
+        if (Rd == Rn || Rd == Rt) {
+          DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n");
+          return false;
+        }
+      }
+    }
+  } else {
+    // Load
+    // A8.6.71 LDREXD
+    // if t == t2 then UNPREDICTABLE
+    if (isDW && Rt == Rt2) {
+      DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n");
+      return false;
+    }
+  }
+
   // Add the destination operand for store.
   if (isStore) {
     MI.addOperand(MCOperand::CreateReg(
-                    getRegisterEnum(B, ARM::GPRRegClassID,
-                                    isSW ? decodeRs(insn) : decodeRm(insn))));
+                    getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                    isSW ? Rt2 : Rd)));
     ++OpIdx;
   }
 
   // Source operand for store and destination operand for load.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
-                                                     decodeRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     Rt)));
   ++OpIdx;
 
   // Thumb2 doubleword complication: with an extra source/destination operand.
   if (isDW) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
-                                                       decodeRs(insn))));
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
+                                                       Rt2)));
     ++OpIdx;
   }
 
   // Finally add the pointer operand.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
-                                                     decodeRn(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     Rn)));
   ++OpIdx;
 
   return true;
 }
 
-// LLVM, as of Jan-05-2010, does not output <Rt2>, i.e., Rs, in the asm.
-// Whereas the ARM Arch. Manual does not require that t2 = t+1 like in ARM ISA.
-//
 // t2LDRDi8: Rd Rs Rn imm8s4 (offset mode)
 // t2LDRDpci: Rd Rs imm8s4 (Not decoded, prefer the generic t2LDRDi8 version)
 // t2STRDi8: Rd Rs Rn imm8s4 (offset mode)
@@ -1255,18 +1310,50 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
   if (!OpInfo) return false;
 
   assert(NumOps >= 4
-         && OpInfo[0].RegClass == ARM::GPRRegClassID
-         && OpInfo[1].RegClass == ARM::GPRRegClassID
-         && OpInfo[2].RegClass == ARM::GPRRegClassID
+         && OpInfo[0].RegClass > 0
+         && OpInfo[0].RegClass == OpInfo[1].RegClass
+         && OpInfo[2].RegClass > 0
          && OpInfo[3].RegClass < 0
          && "Expect >= 4 operands and first 3 as reg operands");
 
+  // Thumnb allows for specifying Rt and Rt2, unlike ARM (which has Rt2==Rt+1).
+  unsigned Rt  = decodeRd(insn);
+  unsigned Rt2 = decodeRs(insn);
+  unsigned Rn  = decodeRn(insn);
+
+  // Some sanity checking first.
+
+  // A8.6.67 LDRD (literal) has its W bit as (0).
+  if (Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST) {
+    if (Rn == 15 && slice(insn, 21, 21) != 0)
+      return false;
+  } else {
+    // For Dual Store, PC cannot be used as the base register.
+    if (Rn == 15) {
+      DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n");
+      return false;
+    }
+  }
+  if (Rt == Rt2) {
+    DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n");
+    return false;
+  }
+  if (Opcode != ARM::t2LDRDi8 && Opcode != ARM::t2STRDi8) {
+    if (Rn == Rt || Rn == Rt2) {
+      DEBUG(errs() << "if wback && (n == t || n == t2) then UNPREDICTABLE\n");
+      return false;
+    }
+  }
+
   // Add the <Rt> <Rt2> operands.
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+  unsigned RegClassPair = OpInfo[0].RegClass;
+  unsigned RegClassBase = OpInfo[2].RegClass;
+  
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair,
                                                      decodeRd(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair,
                                                      decodeRs(insn))));
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassBase,
                                                      decodeRn(insn))));
 
   // Finally add (+/-)imm8*4, depending on the U bit.
@@ -1394,9 +1481,12 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
       && !OpInfo[OpIdx].isOptionalDef()) {
 
-    if (Thumb2ShiftOpcode(Opcode))
-      MI.addOperand(MCOperand::CreateImm(getShiftAmtBits(insn)));
-    else {
+    if (Thumb2ShiftOpcode(Opcode)) {
+      unsigned Imm = getShiftAmtBits(insn);
+      ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 5, 4));
+      getImmShiftSE(ShOp, Imm);
+      MI.addOperand(MCOperand::CreateImm(Imm));
+    } else {
       // Build the constant shift specifier operand.
       unsigned bits2 = getShiftTypeBits(insn);
       unsigned imm5 = getShiftAmtBits(insn);
@@ -1421,7 +1511,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1448,8 +1539,15 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
       DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n");
       return false;
     }
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
-                                                       decodeRn(insn))));
+    int Idx;
+    if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+      // The reg operand is tied to the first reg operand.
+      MI.addOperand(MI.getOperand(Idx));
+    } else {
+      // Add second reg operand.
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+                                                         decodeRn(insn))));
+    }
     ++OpIdx;
   }
 
@@ -1518,7 +1616,7 @@ static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
 // o t2ADDri12, t2SUBri12: Rs Rn imm12
 // o t2LEApcrel (ADR): Rs imm12
 // o t2BFC (BFC): Rs Ro(TIED_TO) bf_inv_mask_imm
-// o t2BFI (BFI) (Currently not defined in LLVM as of Jan-07-2010)
+// o t2BFI (BFI): Rs Ro(TIED_TO) Rn bf_inv_mask_imm
 // o t2MOVi16: Rs imm16
 // o t2MOVTi16: Rs imm16
 // o t2SBFX (SBFX): Rs Rn lsb width
@@ -1579,9 +1677,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
   if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12
       || Opcode == ARM::t2LEApcrel)
     MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
-  else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16)
-    MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
-  else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
+  else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) {
+    if (!B->tryAddingSymbolicOperand(getImm16(insn), 4, MI))
+      MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
+  } else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
     uint32_t mask = 0;
     if (getBitfieldInvMask(insn, mask))
       MI.addOperand(MCOperand::CreateImm(mask));
@@ -1625,8 +1724,7 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) {
 // A8.6.26
 // t2BXJ -> Rn
 //
-// Miscellaneous control: t2DMBsy (and its t2DMB variants),
-// t2DSBsy (and its t2DSB varianst), t2ISBsy, t2CLREX
+// Miscellaneous control:
 //   -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants)
 //
 // Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV
@@ -1643,6 +1741,22 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
   if (NumOps == 0)
     return true;
 
+  if (Opcode == ARM::t2DMB || Opcode == ARM::t2DSB) {
+    // Inst{3-0} encodes the memory barrier option for the variants.
+    unsigned opt = slice(insn, 3, 0);
+    switch (opt) {
+    case ARM_MB::SY:  case ARM_MB::ST:
+    case ARM_MB::ISH: case ARM_MB::ISHST:
+    case ARM_MB::NSH: case ARM_MB::NSHST:
+    case ARM_MB::OSH: case ARM_MB::OSHST:
+      MI.addOperand(MCOperand::CreateImm(opt));
+      NumOpsAdded = 1;
+      return true;
+    default:
+      return false;
+    }
+  }
+
   if (t2MiscCtrlInstr(insn))
     return true;
 
@@ -1719,6 +1833,17 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
     return true;
   }
 
+  // Some instructions have predicate operands first before the immediate.
+  if (Opcode == ARM::tBLXi_r9 || Opcode == ARM::tBLr9) {
+    // Handling the two predicate operands before the imm operand.
+    if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+      NumOpsAdded += 2;
+    else {
+      DEBUG(errs() << "Expected predicate operands not found.\n");
+      return false;
+    }
+  }
+
   // Add the imm operand.
   int Offset = 0;
 
@@ -1739,13 +1864,12 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
     Offset = decodeImm32_BLX(insn);
     break;
   }
-  // When executing a Thumb instruction, PC reads as the address of the current
-  // instruction plus 4.  The assembler subtracts 4 from the difference between
-  // the branch instruction and the target address, disassembler has to add 4 to
-  // to compensate.
-  MI.addOperand(MCOperand::CreateImm(Offset + 4));
 
-  NumOpsAdded = 1;
+  if (!B->tryAddingSymbolicOperand(Offset + B->getBuilderAddress() + 4, 4, MI))
+    MI.addOperand(MCOperand::CreateImm(Offset));
+
+  // This is an increment as some predicate operands may have been added first.
+  NumOpsAdded += 1;
 
   return true;
 }
@@ -1787,7 +1911,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
                                                      decodeRn(insn))));
   ++OpIdx;
 
-  if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
+  if (OpInfo[OpIdx].RegClass == ARM::rGPRRegClassID) {
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
   } else {
@@ -1795,17 +1919,17 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
            && !OpInfo[OpIdx].isOptionalDef()
            && "Pure imm operand expected");
     int Offset = 0;
-    if (slice(insn, 19, 16) == 0xFF) {
-      bool Negative = slice(insn, 23, 23) == 0;
-      unsigned Imm12 = getImm12(insn);
-      Offset = Negative ? -1 - Imm12 : 1 * Imm12;
-    } else if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 ||
-               Opcode == ARM::t2PLIi8) {
+    if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 ||
+        Opcode == ARM::t2PLIi8) {
       // A8.6.117 Encoding T2: add = FALSE
       unsigned Imm8 = getImm8(insn);
-      Offset = -1 - Imm8;
-    } else // The i12 forms.  See, for example, A8.6.117 Encoding T1.
+      Offset = -1 * Imm8;
+    } else {
+      // The i12 forms.  See, for example, A8.6.117 Encoding T1.
+      // Note that currently t2PLDi12 also handles the previously named t2PLDpci
+      // opcode, that's why we use decodeImm12(insn) which returns +/- imm12.
       Offset = decodeImm12(insn);
+    }
     MI.addOperand(MCOperand::CreateImm(Offset));
   }
   ++OpIdx;
@@ -1820,6 +1944,87 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
   return true;
 }
 
+static bool BadRegsThumb2LdSt(unsigned Opcode, uint32_t insn, bool Load,
+      unsigned R0, unsigned R1, unsigned R2, bool UseRm, bool WB) {
+
+  // Inst{22-21} encodes the data item transferred for load/store.
+  // For single word, it is encoded as ob10.
+  bool Word = (slice(insn, 22, 21) == 2);
+  bool Half = (slice(insn, 22, 21) == 1);
+  bool Byte = (slice(insn, 22, 21) == 0);
+
+  if (UseRm && BadReg(R2)) {
+    DEBUG(errs() << "if BadReg(m) then UNPREDICTABLE\n");
+    return true;
+  }
+
+  if (Load) {
+    if (!Word && R0 == 13) {
+      DEBUG(errs() << "if t == 13 then UNPREDICTABLE\n");
+      return true;
+    }
+    if (Byte) {
+      if (WB && R0 == 15 && slice(insn, 10, 8) == 3)  {
+        // A8.6.78 LDRSB (immediate) Encoding T2 (errata markup 8.0)
+        DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n");
+        return true;
+      }
+    }
+    // A6.3.8 Load halfword, memory hints
+    if (Half) {
+      if (WB) {
+        if (R0 == R1)  {
+          // A8.6.82 LDRSH (immediate) Encoding T2
+          DEBUG(errs() << "if WB && n == t then UNPREDICTABLE\n");
+          return true;
+        }
+        if (R0 == 15 && slice(insn, 10, 8) == 3)  {
+          // A8.6.82 LDRSH (immediate) Encoding T2 (errata markup 8.0)
+          DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n");
+          return true;
+        }
+      } else {
+        if (Opcode == ARM::t2LDRHi8 || Opcode == ARM::t2LDRSHi8) {
+          if (R0 == 15 && slice(insn, 10, 8) == 4) {
+            // A8.6.82 LDRSH (immediate) Encoding T2
+            DEBUG(errs() << "if Rt == '1111' and PUW == '100' then SEE"
+                         << " \"Unallocated memory hints\"\n");
+            return true;
+          }
+        } else {
+          if (R0 == 15) {
+            // A8.6.82 LDRSH (immediate) Encoding T1
+            DEBUG(errs() << "if Rt == '1111' then SEE"
+                         << " \"Unallocated memory hints\"\n");
+            return true;
+          }
+        }
+      }
+    }
+  } else {
+    if (WB && R0 == R1) {
+      DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n");
+      return true;
+    }
+    if ((WB && R0 == 15) || (!WB && R1 == 15)) {
+      DEBUG(errs() << "if Rn == '1111' then UNDEFINED\n");
+      return true;
+    }
+    if (Word) {
+      if ((WB && R1 == 15) || (!WB && R0 == 15)) {
+        DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+        return true;
+      }
+    } else {
+      if ((WB && BadReg(R1)) || (!WB && BadReg(R0))) {
+        DEBUG(errs() << "if BadReg(t) then UNPREDICTABLE\n");
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 // A6.3.10 Store single data item
 // A6.3.9 Load byte, memory hints
 // A6.3.8 Load halfword, memory hints
@@ -1865,16 +2070,16 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
   OpIdx = 0;
 
   assert(NumOps >= 3 &&
-         OpInfo[0].RegClass == ARM::GPRRegClassID &&
-         OpInfo[1].RegClass == ARM::GPRRegClassID &&
+         OpInfo[0].RegClass > 0 &&
+         OpInfo[1].RegClass > 0 &&
          "Expect >= 3 operands and first two as reg operands");
 
-  bool ThreeReg = (OpInfo[2].RegClass == ARM::GPRRegClassID);
+  bool ThreeReg = (OpInfo[2].RegClass > 0);
   bool TIED_TO = ThreeReg && TID.getOperandConstraint(2, TOI::TIED_TO) != -1;
   bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td
 
   // Build the register operands, followed by the immediate.
-  unsigned R0, R1, R2 = 0;
+  unsigned R0 = 0, R1 = 0, R2 = 0;
   unsigned Rd = decodeRd(insn);
   int Imm = 0;
 
@@ -1905,19 +2110,24 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
       Imm = decodeImm8(insn);
   }
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                                      R0)));
   ++OpIdx;
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                                      R1)));
   ++OpIdx;
 
   if (ThreeReg) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+    // This could be an offset register or a TIED_TO register.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
                                                        R2)));
     ++OpIdx;
   }
 
+  if (BadRegsThumb2LdSt(Opcode, insn, Load, R0, R1, R2, ThreeReg & !TIED_TO,
+                        TIED_TO))
+    return false;
+
   assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
          && !OpInfo[OpIdx].isOptionalDef()
          && "Pure imm operand expected");
@@ -1947,25 +2157,25 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
   OpIdx = 0;
 
   assert(NumOps >= 2 &&
-         OpInfo[0].RegClass == ARM::rGPRRegClassID &&
-         OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+         OpInfo[0].RegClass > 0 &&
+         OpInfo[1].RegClass > 0 &&
          "Expect >= 2 operands and first two as reg operands");
 
   // Build the register operands, followed by the optional rotation amount.
 
-  bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::rGPRRegClassID;
+  bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass > 0;
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                                      decodeRs(insn))));
   ++OpIdx;
 
   if (ThreeReg) {
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
                                                        decodeRn(insn))));
     ++OpIdx;
   }
 
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
                                                      decodeRm(insn))));
   ++OpIdx;
 
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 1499da0..fc2aa75 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -29,6 +29,9 @@ StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const {
   return getInstructionName(Opcode);
 }
 
+StringRef ARMInstPrinter::getRegName(unsigned RegNo) const {
+  return getRegisterName(RegNo);
+}
 
 void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
   unsigned Opcode = MI->getOpcode();
@@ -133,9 +136,10 @@ static void printSOImm(raw_ostream &O, int64_t V, raw_ostream *CommentStream,
   unsigned Rot = ARM_AM::getSOImmValRot(V);
 
   // Print low-level immediate formation info, per
-  // A5.1.3: "Data-processing operands - Immediate".
+  // A5.2.3: Data-processing (immediate), and
+  // A5.2.4: Modified immediate constants in ARM instructions
   if (Rot) {
-    O << "#" << Imm << ", " << Rot;
+    O << "#" << Imm << ", #" << Rot;
     // Pretty printed version.
     if (CommentStream)
       *CommentStream << (int)ARM_AM::rotr32(Imm, Rot) << "\n";
@@ -178,18 +182,16 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
   }
 }
 
+//===--------------------------------------------------------------------===//
+// Addressing Mode #2
+//===--------------------------------------------------------------------===//
 
-void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
-                                           raw_ostream &O) {
+void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+                                                raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
   const MCOperand &MO2 = MI->getOperand(Op+1);
   const MCOperand &MO3 = MI->getOperand(Op+2);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, O);
-    return;
-  }
-
   O << "[" << getRegisterName(MO1.getReg());
 
   if (!MO2.getReg()) {
@@ -212,6 +214,50 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
   O << "]";
 }
 
+void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op,
+                                         raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO3 = MI->getOperand(Op+2);
+
+  O << "[" << getRegisterName(MO1.getReg()) << "], ";
+
+  if (!MO2.getReg()) {
+    unsigned ImmOffs = ARM_AM::getAM2Offset(MO3.getImm());
+    O << '#'
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+      << ImmOffs;
+    return;
+  }
+
+  O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+    << getRegisterName(MO2.getReg());
+
+  if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+    O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
+    << " #" << ShImm;
+}
+
+void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, O);
+    return;
+  }
+
+  const MCOperand &MO3 = MI->getOperand(Op+2);
+  unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm());
+
+  if (IdxMode == ARMII::IndexModePost) {
+    printAM2PostIndexOp(MI, Op, O);
+    return;
+  }
+  printAM2PreOrOffsetIndexOp(MI, Op, O);
+}
+
 void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
                                                  unsigned OpNum,
                                                  raw_ostream &O) {
@@ -235,11 +281,35 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
     << " #" << ShImm;
 }
 
-void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
-                                           raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+//===--------------------------------------------------------------------===//
+// Addressing Mode #3
+//===--------------------------------------------------------------------===//
+
+void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
+                                         raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO3 = MI->getOperand(Op+2);
+
+  O << "[" << getRegisterName(MO1.getReg()) << "], ";
+
+  if (MO2.getReg()) {
+    O << (char)ARM_AM::getAM3Op(MO3.getImm())
+    << getRegisterName(MO2.getReg());
+    return;
+  }
+
+  unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
+  O << '#'
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+    << ImmOffs;
+}
+
+void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+                                                raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO3 = MI->getOperand(Op+2);
 
   O << '[' << getRegisterName(MO1.getReg());
 
@@ -256,6 +326,18 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
   O << ']';
 }
 
+void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
+                                           raw_ostream &O) {
+  const MCOperand &MO3 = MI->getOperand(Op+2);
+  unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm());
+
+  if (IdxMode == ARMII::IndexModePost) {
+    printAM3PostIndexOp(MI, Op, O);
+    return;
+  }
+  printAM3PreOrOffsetIndexOp(MI, Op, O);
+}
+
 void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
                                                  unsigned OpNum,
                                                  raw_ostream &O) {
@@ -314,6 +396,12 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
   O << "]";
 }
 
+void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  O << "[" << getRegisterName(MO1.getReg()) << "]";
+}
+
 void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
                                                  unsigned OpNum,
                                                  raw_ostream &O) {
@@ -414,16 +502,6 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
   }
 }
 
-void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum,
-                                         raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-  O << '#';
-  if (Op.getImm() < 0)
-    O << '-' << (-Op.getImm() - 1);
-  else
-    O << Op.getImm();
-}
-
 void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
                                            raw_ostream &O) {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 679d313..b3ac03a 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -17,14 +17,18 @@
 #include "llvm/MC/MCInstPrinter.h"
 
 namespace llvm {
-  class MCOperand;
+
+class MCOperand;
+class TargetMachine;
 
 class ARMInstPrinter : public MCInstPrinter {
 public:
-  ARMInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
+  ARMInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+    : MCInstPrinter(MAI) {}
 
   virtual void printInst(const MCInst *MI, raw_ostream &O);
   virtual StringRef getOpcodeName(unsigned Opcode) const;
+  virtual StringRef getRegName(unsigned RegNo) const;
 
   static const char *getInstructionName(unsigned Opcode);
 
@@ -38,15 +42,25 @@ public:
   void printSOImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
 
   void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
   void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O);
   void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
                                    raw_ostream &O);
+
   void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAM3PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O);
   void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
                                    raw_ostream &O);
+
   void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
                                    raw_ostream &O);
 
@@ -87,9 +101,7 @@ public:
   void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printCPSOptionOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printNegZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
                                       raw_ostream &O);
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 9a27e2f..f6d0242 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -15,11 +15,13 @@
 #define DEBUG_TYPE "mlx-expansion"
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -49,15 +51,17 @@ namespace {
     const TargetRegisterInfo *TRI;
     MachineRegisterInfo *MRI;
 
+    bool isA9;
     unsigned MIIdx;
     MachineInstr* LastMIs[4];
+    SmallPtrSet<MachineInstr*, 4> IgnoreStall;
 
     void clearStack();
     void pushStack(MachineInstr *MI);
     MachineInstr *getAccDefMI(MachineInstr *MI) const;
     unsigned getDefReg(MachineInstr *MI) const;
     bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
-    bool FindMLxHazard(MachineInstr *MI) const;
+    bool FindMLxHazard(MachineInstr *MI);
     void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
                                 unsigned MulOpc, unsigned AddSubOpc,
                                 bool NegAcc, bool HasLane);
@@ -146,7 +150,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
 }
 
 
-bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const {
+bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
   if (NumExpand >= ExpandLimit)
     return false;
 
@@ -154,7 +158,7 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const {
     return true;
 
   MachineInstr *DefMI = getAccDefMI(MI);
-  if (TII->isFpMLxInstruction(DefMI->getOpcode()))
+  if (TII->isFpMLxInstruction(DefMI->getOpcode())) {
     // r0 = vmla
     // r3 = vmla r0, r1, r2
     // takes 16 - 17 cycles
@@ -163,24 +167,33 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const {
     // r4 = vmul r1, r2
     // r3 = vadd r0, r4
     // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
+    IgnoreStall.insert(DefMI);
     return true;
+  }
+
+  if (IgnoreStall.count(MI))
+    return false;
 
   // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
   // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
   // preserves the in-order retirement of the instructions.
   // Look at the next few instructions, if *most* of them can cause hazards,
   // then the scheduler can't *fix* this, we'd better break up the VMLA.
+  unsigned Limit1 = isA9 ? 1 : 4;
+  unsigned Limit2 = isA9 ? 1 : 4;
   for (unsigned i = 1; i <= 4; ++i) {
     int Idx = ((int)MIIdx - i + 4) % 4;
     MachineInstr *NextMI = LastMIs[Idx];
     if (!NextMI)
       continue;
 
-    if (TII->canCauseFpMLxStall(NextMI->getOpcode()))
-      return true;
+    if (TII->canCauseFpMLxStall(NextMI->getOpcode())) {
+      if (i <= Limit1)
+        return true;
+    }
 
     // Look for VMLx RAW hazard.
-    if (hasRAWHazard(getDefReg(MI), NextMI))
+    if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI))
       return true;
   }
 
@@ -248,6 +261,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
   bool Changed = false;
 
   clearStack();
+  IgnoreStall.clear();
 
   unsigned Skip = 0;
   MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
@@ -299,6 +313,8 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
   TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
   TRI = Fn.getTarget().getRegisterInfo();
   MRI = &Fn.getRegInfo();
+  const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+  isA9 = STI->isCortexA9();
 
   bool Modified = false;
   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 9fc3fb9..8ba9a27 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -657,3 +657,27 @@ Note that both "tst" and "moveq" are redundant.
 
 //===---------------------------------------------------------------------===//
 
+When loading immediate constants with movt/movw, if there are multiple
+constants needed with the same low 16 bits, and those values are not live at
+the same time, it would be possible to use a single movw instruction, followed
+by multiple movt instructions to rewrite the high bits to different values.
+For example:
+
+  volatile store i32 -1, i32* inttoptr (i32 1342210076 to i32*), align 4,
+  !tbaa
+!0
+  volatile store i32 -1, i32* inttoptr (i32 1342341148 to i32*), align 4,
+  !tbaa
+!0
+
+is compiled and optimized to:
+
+    movw    r0, #32796
+    mov.w    r1, #-1
+    movt    r0, #20480
+    str    r1, [r0]
+    movw    r0, #32796    @ <= this MOVW is not needed, value is there already
+    movt    r0, #20482
+    str    r1, [r0]
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 233e165..dee3d27 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -34,13 +34,14 @@ bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const
   return !MF.getFrameInfo()->hasVarSizedObjects();
 }
 
-static void emitSPUpdate(MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator &MBBI,
-                         const TargetInstrInfo &TII, DebugLoc dl,
-                         const Thumb1RegisterInfo &MRI,
-                         int NumBytes) {
-  emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
-                            MRI, dl);
+static void
+emitSPUpdate(MachineBasicBlock &MBB,
+             MachineBasicBlock::iterator &MBBI,
+             const TargetInstrInfo &TII, DebugLoc dl,
+             const Thumb1RegisterInfo &MRI,
+             int NumBytes, unsigned MIFlags = MachineInstr::NoFlags)  {
+  emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+                            MRI, MIFlags);
 }
 
 void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -70,11 +71,13 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
   int FramePtrSpillFI = 0;
 
   if (VARegSaveSize)
-    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize);
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize,
+                 MachineInstr::FrameSetup);
 
   if (!AFI->hasStackFrame()) {
     if (NumBytes != 0)
-      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes);
+      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+                   MachineInstr::FrameSetup);
     return;
   }
 
@@ -131,7 +134,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
   // Adjust FP so it point to the stack slot that contains the previous FP.
   if (hasFP(MF)) {
     BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
-      .addFrameIndex(FramePtrSpillFI).addImm(0);
+      .addFrameIndex(FramePtrSpillFI).addImm(0)
+      .setMIFlags(MachineInstr::FrameSetup);
     if (NumBytes > 7)
       // If offset is > 7 then sp cannot be adjusted in a single instruction,
       // try restoring from fp instead.
@@ -140,7 +144,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
 
   if (NumBytes)
     // Insert it after all the callee-save spills.
-    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes);
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+                 MachineInstr::FrameSetup);
 
   if (STI.isTargetELF() && hasFP(MF))
     MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
@@ -156,7 +161,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
   // to reference locals.
   if (RegInfo->hasBasePointer(MF))
     BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
-    
+
   // If the frame has variable sized objects then the epilogue must restore
   // the sp from fp. We can assume there's an FP here since hasFP already
   // checks for hasVarSizedObjects.
@@ -232,8 +237,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
       if (NumBytes) {
         assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
                "No scratch register to restore SP from FP!");
-        emitThumbRegPlusImmediate(MBB, MBBI, ARM::R4, FramePtr, -NumBytes,
-                                  TII, *RegInfo, dl);
+        emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+                                  TII, *RegInfo);
         BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
           .addReg(ARM::R4);
       } else
@@ -307,6 +312,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 
     MIB.addReg(Reg, getKillRegState(isKill));
   }
+  MIB.setMIFlags(MachineInstr::FrameSetup);
   return true;
 }
 
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index c592e12..bcfc516 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #ifndef __THUMB_FRAMEINFO_H_
-#define __THUMM_FRAMEINFO_H_
+#define __THUMB_FRAMEINFO_H_
 
 #include "ARM.h"
 #include "ARMFrameLowering.h"
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index f62a13e..33cefb6 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -31,8 +31,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -48,15 +46,29 @@ Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii,
   : ARMBaseRegisterInfo(tii, sti) {
 }
 
+const TargetRegisterClass*
+Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
+                                                                         const {
+  if (RC == ARM::tGPRRegisterClass || RC->hasSuperClass(ARM::tGPRRegisterClass))
+    return ARM::tGPRRegisterClass;
+  return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC);
+}
+
+const TargetRegisterClass *
+Thumb1RegisterInfo::getPointerRegClass(unsigned Kind) const {
+  return ARM::tGPRRegisterClass;
+}
+
 /// emitLoadConstPool - Emits a load from constpool to materialize the
 /// specified immediate.
-void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator &MBBI,
-                                           DebugLoc dl,
-                                           unsigned DestReg, unsigned SubIdx,
-                                           int Val,
-                                           ARMCC::CondCodes Pred,
-                                           unsigned PredReg) const {
+void
+Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator &MBBI,
+                                      DebugLoc dl,
+                                      unsigned DestReg, unsigned SubIdx,
+                                      int Val,
+                                      ARMCC::CondCodes Pred, unsigned PredReg,
+                                      unsigned MIFlags) const {
   MachineFunction &MF = *MBB.getParent();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
   const Constant *C = ConstantInt::get(
@@ -64,8 +76,9 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
   unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
 
   BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci))
-          .addReg(DestReg, getDefRegState(true), SubIdx)
-          .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg);
+    .addReg(DestReg, getDefRegState(true), SubIdx)
+    .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg)
+    .setMIFlags(MIFlags);
 }
 
 
@@ -76,11 +89,12 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
 static
 void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator &MBBI,
+                              DebugLoc dl,
                               unsigned DestReg, unsigned BaseReg,
                               int NumBytes, bool CanChangeCC,
                               const TargetInstrInfo &TII,
                               const ARMBaseRegisterInfo& MRI,
-                              DebugLoc dl) {
+                              unsigned MIFlags = MachineInstr::NoFlags) {
     MachineFunction &MF = *MBB.getParent();
     bool isHigh = !isARMLowRegister(DestReg) ||
                   (BaseReg != 0 && !isARMLowRegister(BaseReg));
@@ -101,14 +115,15 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
 
     if (NumBytes <= 255 && NumBytes >= 0)
       AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
-        .addImm(NumBytes);
+        .addImm(NumBytes).setMIFlags(MIFlags);
     else if (NumBytes < 0 && NumBytes >= -255) {
       AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
-        .addImm(NumBytes);
+        .addImm(NumBytes).setMIFlags(MIFlags);
       AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
-        .addReg(LdReg, RegState::Kill);
+        .addReg(LdReg, RegState::Kill).setMIFlags(MIFlags);
     } else
-      MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes);
+      MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes,
+                            ARMCC::AL, 0, MIFlags);
 
     // Emit add / sub.
     int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
@@ -151,10 +166,11 @@ static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
 /// a destreg = basereg + immediate in Thumb code.
 void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator &MBBI,
+                                     DebugLoc dl,
                                      unsigned DestReg, unsigned BaseReg,
                                      int NumBytes, const TargetInstrInfo &TII,
                                      const ARMBaseRegisterInfo& MRI,
-                                     DebugLoc dl) {
+                                     unsigned MIFlags) {
   bool isSub = NumBytes < 0;
   unsigned Bytes = (unsigned)NumBytes;
   if (isSub) Bytes = -NumBytes;
@@ -211,8 +227,9 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
   if (NumMIs > Threshold) {
     // This will expand into too many instructions. Load the immediate from a
     // constpool entry.
-    emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII,
-                             MRI, dl);
+    emitThumbRegPlusImmInReg(MBB, MBBI, dl,
+                             DestReg, BaseReg, NumBytes, true,
+                             TII, MRI, MIFlags);
     return;
   }
 
@@ -224,11 +241,12 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
       Bytes -= ThisVal;
       const TargetInstrDesc &TID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
       const MachineInstrBuilder MIB =
-        AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg));
+        AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg).setMIFlags(MIFlags));
       AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
     } else {
       BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
-        .addReg(BaseReg, RegState::Kill);
+        .addReg(BaseReg, RegState::Kill)
+        .setMIFlags(MIFlags);
     }
     BaseReg = DestReg;
   }
@@ -243,9 +261,10 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
       if (NeedCC)
         MIB = AddDefaultT1CC(MIB);
-      MIB .addReg(DestReg).addImm(ThisVal);
+      MIB.addReg(DestReg).addImm(ThisVal);
       if (NeedPred)
         MIB = AddDefaultPred(MIB);
+      MIB.setMIFlags(MIFlags);
     }
     else {
       bool isKill = BaseReg != ARM::SP;
@@ -255,8 +274,9 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
       MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
       if (NeedPred)
         MIB = AddDefaultPred(MIB);
-      BaseReg = DestReg;
+      MIB.setMIFlags(MIFlags);
 
+      BaseReg = DestReg;
       if (Opc == ARM::tADDrSPi) {
         // r4 = add sp, imm
         // r4 = add r4, imm
@@ -274,7 +294,8 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
     const TargetInstrDesc &TID = TII.get(ExtraOpc);
     AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg))
                    .addReg(DestReg, RegState::Kill)
-                   .addImm(((unsigned)NumBytes) & 3));
+                   .addImm(((unsigned)NumBytes) & 3)
+                   .setMIFlags(MIFlags));
   }
 }
 
@@ -283,8 +304,8 @@ static void emitSPUpdate(MachineBasicBlock &MBB,
                          const TargetInstrInfo &TII, DebugLoc dl,
                          const Thumb1RegisterInfo &MRI,
                          int NumBytes) {
-  emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
-                            MRI, dl);
+  emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+                            MRI);
 }
 
 void Thumb1RegisterInfo::
@@ -337,7 +358,7 @@ static void emitThumbConstant(MachineBasicBlock &MBB,
                                         DestReg))
                  .addImm(ThisVal));
   if (Imm > 0)
-    emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl);
+    emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI);
   if (isSub) {
     const TargetInstrDesc &TID = TII.get(ARM::tRSB);
     AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg))
@@ -430,8 +451,8 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
     // MI would expand into a large number of instructions. Don't try to
     // simplify the immediate.
     if (NumMIs > 2) {
-      emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
-                                *this, dl);
+      emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
+                                *this);
       MBB.erase(II);
       return true;
     }
@@ -450,8 +471,8 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
       }
       Offset = (Offset - Mask * Scale);
       MachineBasicBlock::iterator NII = llvm::next(II);
-      emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
-                                *this, dl);
+      emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII,
+                                *this);
     } else {
       // Translate r0 = add sp, -imm to
       // r0 = -imm (this is then translated into a series of instructons)
@@ -645,15 +666,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     bool UseRR = false;
     if (Opcode == ARM::tRestore) {
       if (FrameReg == ARM::SP)
-        emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
-                                 Offset, false, TII, *this, dl);
+        emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg,
+                                 Offset, false, TII, *this);
       else {
         emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset);
         UseRR = true;
       }
     } else {
-      emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
-                                *this, dl);
+      emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII,
+                                *this);
     }
 
     MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
@@ -668,15 +689,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
       if (Opcode == ARM::tSpill) {
         if (FrameReg == ARM::SP)
-          emitThumbRegPlusImmInReg(MBB, II, VReg, FrameReg,
-                                   Offset, false, TII, *this, dl);
+          emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg,
+                                   Offset, false, TII, *this);
         else {
           emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
           UseRR = true;
         }
       } else
-        emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII,
-                                  *this, dl);
+        emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII,
+                                  *this);
       MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
       MI.getOperand(i).ChangeToRegister(VReg, false, false, true);
       if (UseRR)
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 8a87cc5..9060e59 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -28,6 +28,11 @@ struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
 public:
   Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
 
+  const TargetRegisterClass*
+  getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+  const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+
   /// emitLoadConstPool - Emits a load from constpool to materialize the
   /// specified immediate.
  void emitLoadConstPool(MachineBasicBlock &MBB,
@@ -35,7 +40,8 @@ public:
                         DebugLoc dl,
                         unsigned DestReg, unsigned SubIdx, int Val,
                         ARMCC::CondCodes Pred = ARMCC::AL,
-                        unsigned PredReg = 0) const;
+                        unsigned PredReg = 0,
+                        unsigned MIFlags = MachineInstr::NoFlags) const;
 
   /// Code Generation virtual methods...
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 9b1073b..d169dbb 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -184,7 +184,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator &MBBI, DebugLoc dl,
                                unsigned DestReg, unsigned BaseReg, int NumBytes,
                                ARMCC::CondCodes Pred, unsigned PredReg,
-                               const ARMBaseInstrInfo &TII) {
+                               const ARMBaseInstrInfo &TII, unsigned MIFlags) {
   bool isSub = NumBytes < 0;
   if (isSub) NumBytes = -NumBytes;
 
@@ -198,14 +198,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
       // Use a movw to materialize the 16-bit constant.
       BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg)
         .addImm(NumBytes)
-        .addImm((unsigned)Pred).addReg(PredReg);
+        .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
       Fits = true;
     } else if ((NumBytes & 0xffff) == 0) {
       // Use a movt to materialize the 32-bit constant.
       BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg)
         .addReg(DestReg)
         .addImm(NumBytes >> 16)
-        .addImm((unsigned)Pred).addReg(PredReg);
+        .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
       Fits = true;
     }
 
@@ -214,12 +214,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
         BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg)
           .addReg(BaseReg, RegState::Kill)
           .addReg(DestReg, RegState::Kill)
-          .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+          .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+          .setMIFlags(MIFlags);
       } else {
         BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg)
           .addReg(DestReg, RegState::Kill)
           .addReg(BaseReg, RegState::Kill)
-        .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+          .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+          .setMIFlags(MIFlags);
       }
       return;
     }
@@ -230,7 +232,8 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
     unsigned Opc = 0;
     if (DestReg == ARM::SP && BaseReg != ARM::SP) {
       // mov sp, rn. Note t2MOVr cannot be used.
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg).addReg(BaseReg);
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg)
+        .addReg(BaseReg).setMIFlags(MIFlags);
       BaseReg = ARM::SP;
       continue;
     }
@@ -243,7 +246,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
         Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
         // FIXME: Fix Thumb1 immediate encoding.
         BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
-          .addReg(BaseReg).addImm(ThisVal/4);
+          .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags);
         NumBytes = 0;
         continue;
       }
@@ -283,7 +286,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
     MachineInstrBuilder MIB =
       AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
                      .addReg(BaseReg, RegState::Kill)
-                     .addImm(ThisVal));
+                     .addImm(ThisVal)).setMIFlags(MIFlags);
     if (HasCCOut)
       AddDefaultCC(MIB);
 
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 099b8f7..355c3bf 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -13,26 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARM.h"
-#include "ARMAddressingModes.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
 #include "Thumb2InstrInfo.h"
 #include "Thumb2RegisterInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
 Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
@@ -42,13 +31,14 @@ Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
 
 /// emitLoadConstPool - Emits a load from constpool to materialize the
 /// specified immediate.
-void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator &MBBI,
-                                           DebugLoc dl,
-                                           unsigned DestReg, unsigned SubIdx,
-                                           int Val,
-                                           ARMCC::CondCodes Pred,
-                                           unsigned PredReg) const {
+void
+Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator &MBBI,
+                                      DebugLoc dl,
+                                      unsigned DestReg, unsigned SubIdx,
+                                      int Val,
+                                      ARMCC::CondCodes Pred, unsigned PredReg,
+                                      unsigned MIFlags) const {
   MachineFunction &MF = *MBB.getParent();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
   const Constant *C = ConstantInt::get(
@@ -57,5 +47,6 @@ void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
 
   BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
     .addReg(DestReg, getDefRegState(true), SubIdx)
-    .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0);
+    .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
+    .setMIFlags(MIFlags);
 }
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index b3cf2e5..824378a 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -35,7 +35,8 @@ public:
                          DebugLoc dl,
                          unsigned DestReg, unsigned SubIdx, int Val,
                          ARMCC::CondCodes Pred = ARMCC::AL,
-                         unsigned PredReg = 0) const;
+                         unsigned PredReg = 0,
+                         unsigned MIFlags = MachineInstr::NoFlags) const;
 };
 }
 
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index cc8f61c..ce2e966 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -12,6 +12,7 @@
 #include "ARMAddressingModes.h"
 #include "ARMBaseRegisterInfo.h"
 #include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
 #include "Thumb2InstrInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -49,82 +50,86 @@ namespace {
                            // 1 - No cc field.
                            // 2 - Always set CPSR.
     unsigned PredCC2  : 2;
+    unsigned PartFlag : 1; // 16-bit instruction does partial flag update
     unsigned Special  : 1; // Needs to be dealt with specially
   };
 
   static const ReduceEntry ReduceTable[] = {
-    // Wide,        Narrow1,      Narrow2,     imm1,imm2,  lo1, lo2, P/C, S
-    { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,    0,   1,  0,0, 0 },
-    { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0 },
-    { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0 },
+    // Wide,        Narrow1,      Narrow2,     imm1,imm2,  lo1, lo2, P/C, PF, S
+    { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,    0,   1,  0,0, 0,0 },
+    { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0,0 },
+    { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0,0 },
     // Note: immediate scale is 4.
-    { ARM::t2ADDrSPi,ARM::tADDrSPi,0,            8,   0,    1,   0,  1,0, 1 },
-    { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  2,2, 1 },
-    { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,    1,   0,  2,0, 1 },
-    { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,    0,   1,  0,0, 0 },
-    { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,    1,   0,  0,0, 0 },
-    { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,    0,   1,  0,0, 0 },
-    { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2ADDrSPi,ARM::tADDrSPi,0,            8,   0,    1,   0,  1,0, 0,1 },
+    { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  2,2, 0,1 },
+    { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,    1,   0,  2,0, 0,1 },
+    { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,    0,   1,  0,0, 1,0 },
+    { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,    1,   0,  0,0, 1,0 },
+    { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,    0,   1,  0,0, 1,0 },
+    { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,    0,   1,  0,0, 1,0 },
     //FIXME: Disable CMN, as CCodes are backwards from compare expectations
-    //{ ARM::t2CMNrr, ARM::tCMN,    0,             0,   0,    1,   0,  2,0, 0 },
-    { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,    1,   0,  2,0, 0 },
-    { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,    0,   0,  2,0, 1 },
-    { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,    0,   1,  0,0, 0 },
+    //{ ARM::t2CMNrr, ARM::tCMN,  0,             0,   0,    1,   0,  2,0, 0,0 },
+    { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,    1,   0,  2,0, 0,0 },
+    { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,    0,   0,  2,0, 0,1 },
+    { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,    0,   1,  0,0, 1,0 },
     // FIXME: adr.n immediate offset must be multiple of 4.
-    //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,     0,   0,    1,   0,  1,0, 0 },
-    { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,    1,   0,  0,0, 0 },
-    { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,    0,   1,  0,0, 0 },
-    { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 0 },
-    { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,    0,   1,  0,0, 0 },
-    { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0 },
-    { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 1 },
+    //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,   0,   0,    1,   0,  1,0, 0,0 },
+    { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,    1,   0,  0,0, 1,0 },
+    { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,    0,   1,  0,0, 1,0 },
+    { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 1,0 },
+    { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,    0,   1,  0,0, 1,0 },
+    // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
+    // likely to cause issue in the loop. As a size / performance workaround,
+    // they are not marked as such.
+    { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,0 },
+    { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,1 },
     // FIXME: Do we need the 16-bit 'S' variant?
-    { ARM::t2MOVr,ARM::tMOVgpr2gpr,0,            0,   0,    0,   0,  1,0, 0 },
-    { ARM::t2MOVCCr,0,            ARM::tMOVCCr,  0,   0,    0,   0,  0,1, 0 },
-    { ARM::t2MOVCCi,0,            ARM::tMOVCCi,  0,   8,    0,   1,  0,1, 0 },
-    { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    0,   1,  0,0, 0 },
-    { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0 },
-    { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,    0,   1,  0,0, 0 },
-    { ARM::t2REV,   ARM::tREV,    0,             0,   0,    1,   0,  1,0, 0 },
-    { ARM::t2REV16, ARM::tREV16,  0,             0,   0,    1,   0,  1,0, 0 },
-    { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,    1,   0,  1,0, 0 },
-    { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,    0,   1,  0,0, 0 },
-    { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,    1,   0,  0,0, 1 },
-    { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,    1,   0,  2,0, 1 },
-    { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,    0,   1,  0,0, 0 },
-    { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  0,0, 0 },
-    { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,    1,   0,  0,0, 0 },
-    { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  2,2, 0 },
-    { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,    1,   0,  2,0, 0 },
-    { ARM::t2SXTBr, ARM::tSXTB,   0,             0,   0,    1,   0,  1,0, 0 },
-    { ARM::t2SXTHr, ARM::tSXTH,   0,             0,   0,    1,   0,  1,0, 0 },
-    { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,    1,   0,  2,0, 0 },
-    { ARM::t2UXTBr, ARM::tUXTB,   0,             0,   0,    1,   0,  1,0, 0 },
-    { ARM::t2UXTHr, ARM::tUXTH,   0,             0,   0,    1,   0,  1,0, 0 },
+    { ARM::t2MOVr,ARM::tMOVgpr2gpr,0,            0,   0,    0,   0,  1,0, 0,0 },
+    { ARM::t2MOVCCr,0,            ARM::tMOVCCr,  0,   0,    0,   0,  0,1, 0,0 },
+    { ARM::t2MOVCCi,0,            ARM::tMOVCCi,  0,   8,    0,   1,  0,1, 0,0 },
+    { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    0,   1,  0,0, 1,0 },
+    { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0,0 },
+    { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,    0,   1,  0,0, 1,0 },
+    { ARM::t2REV,   ARM::tREV,    0,             0,   0,    1,   0,  1,0, 0,0 },
+    { ARM::t2REV16, ARM::tREV16,  0,             0,   0,    1,   0,  1,0, 0,0 },
+    { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,    1,   0,  1,0, 0,0 },
+    { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,    0,   1,  0,0, 1,0 },
+    { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,    1,   0,  2,0, 0,1 },
+    { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,    0,   1,  0,0, 0,0 },
+    { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  0,0, 0,0 },
+    { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,    1,   0,  0,0, 0,0 },
+    { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  2,2, 0,0 },
+    { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,    1,   0,  2,0, 0,0 },
+    { ARM::t2SXTBr, ARM::tSXTB,   0,             0,   0,    1,   0,  1,0, 0,0 },
+    { ARM::t2SXTHr, ARM::tSXTH,   0,             0,   0,    1,   0,  1,0, 0,0 },
+    { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,    1,   0,  2,0, 0,0 },
+    { ARM::t2UXTBr, ARM::tUXTB,   0,             0,   0,    1,   0,  1,0, 0,0 },
+    { ARM::t2UXTHr, ARM::tUXTH,   0,             0,   0,    1,   0,  1,0, 0,0 },
 
     // FIXME: Clean this up after splitting each Thumb load / store opcode
     // into multiple ones.
-    { ARM::t2LDRi12,ARM::tLDRi,   ARM::tLDRspi,  5,   8,    1,   0,  0,0, 1 },
-    { ARM::t2LDRs,  ARM::tLDRr,   0,             0,   0,    1,   0,  0,0, 1 },
-    { ARM::t2LDRBi12,ARM::tLDRBi, 0,             5,   0,    1,   0,  0,0, 1 },
-    { ARM::t2LDRBs, ARM::tLDRBr,  0,             0,   0,    1,   0,  0,0, 1 },
-    { ARM::t2LDRHi12,ARM::tLDRHi, 0,             5,   0,    1,   0,  0,0, 1 },
-    { ARM::t2LDRHs, ARM::tLDRHr,  0,             0,   0,    1,   0,  0,0, 1 },
-    { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,    1,   0,  0,0, 1 },
-    { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,    1,   0,  0,0, 1 },
-    { ARM::t2STRi12,ARM::tSTRi,   ARM::tSTRspi,  5,   8,    1,   0,  0,0, 1 },
-    { ARM::t2STRs,  ARM::tSTRr,   0,             0,   0,    1,   0,  0,0, 1 },
-    { ARM::t2STRBi12,ARM::tSTRBi, 0,             5,   0,    1,   0,  0,0, 1 },
-    { ARM::t2STRBs, ARM::tSTRBr,  0,             0,   0,    1,   0,  0,0, 1 },
-    { ARM::t2STRHi12,ARM::tSTRHi, 0,             5,   0,    1,   0,  0,0, 1 },
-    { ARM::t2STRHs, ARM::tSTRHr,  0,             0,   0,    1,   0,  0,0, 1 },
-
-    { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,    1,   1,  1,1, 1 },
-    { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,    1,   1,  1,1, 1 },
-    { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,    1,   1,  1,1, 1 },
+    { ARM::t2LDRi12,ARM::tLDRi,   ARM::tLDRspi,  5,   8,    1,   0,  0,0, 0,1 },
+    { ARM::t2LDRs,  ARM::tLDRr,   0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2LDRBi12,ARM::tLDRBi, 0,             5,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2LDRBs, ARM::tLDRBr,  0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2LDRHi12,ARM::tLDRHi, 0,             5,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2LDRHs, ARM::tLDRHr,  0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2STRi12,ARM::tSTRi,   ARM::tSTRspi,  5,   8,    1,   0,  0,0, 0,1 },
+    { ARM::t2STRs,  ARM::tSTRr,   0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2STRBi12,ARM::tSTRBi, 0,             5,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2STRBs, ARM::tSTRBr,  0,             0,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2STRHi12,ARM::tSTRHi, 0,             5,   0,    1,   0,  0,0, 0,1 },
+    { ARM::t2STRHs, ARM::tSTRHr,  0,             0,   0,    1,   0,  0,0, 0,1 },
+
+    { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,    1,   1,  1,1, 0,1 },
+    { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,    1,   1,  1,1, 0,1 },
+    { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,    1,   1,  1,1, 0,1 },
     // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
-    { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,    1,   1,  1,1, 1 },
-    { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,    1,   1,  1,1, 1 },
+    { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,    1,   1,  1,1, 0,1 },
+    { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,    1,   1,  1,1, 0,1 },
   };
 
   class Thumb2SizeReduce : public MachineFunctionPass {
@@ -133,6 +138,7 @@ namespace {
     Thumb2SizeReduce();
 
     const Thumb2InstrInfo *TII;
+    const ARMSubtarget *STI;
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
@@ -144,6 +150,8 @@ namespace {
     /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
     DenseMap<unsigned, unsigned> ReduceOpcodeMap;
 
+    bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use);
+
     bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
                          bool is2Addr, ARMCC::CondCodes Pred,
                          bool LiveCPSR, bool &HasCC, bool &CCDead);
@@ -152,19 +160,20 @@ namespace {
                          const ReduceEntry &Entry);
 
     bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
-                       const ReduceEntry &Entry, bool LiveCPSR);
+                       const ReduceEntry &Entry, bool LiveCPSR,
+                       MachineInstr *CPSRDef);
 
     /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
     /// instruction.
     bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
                        const ReduceEntry &Entry,
-                       bool LiveCPSR);
+                       bool LiveCPSR, MachineInstr *CPSRDef);
 
     /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
     /// non-two-address instruction.
     bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
                         const ReduceEntry &Entry,
-                        bool LiveCPSR);
+                        bool LiveCPSR, MachineInstr *CPSRDef);
 
     /// ReduceMBB - Reduce width of instructions in the specified basic block.
     bool ReduceMBB(MachineBasicBlock &MBB);
@@ -187,6 +196,52 @@ static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) {
   return false;
 }
 
+/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
+/// the 's' 16-bit instruction partially update CPSR. Abort the
+/// transformation to avoid adding false dependency on last CPSR setting
+/// instruction which hurts the ability for out-of-order execution engine
+/// to do register renaming magic.
+/// This function checks if there is a read-of-write dependency between the
+/// last instruction that defines the CPSR and the current instruction. If there
+/// is, then there is no harm done since the instruction cannot be retired
+/// before the CPSR setting instruction anyway.
+/// Note, we are not doing full dependency analysis here for the sake of compile
+/// time. We're not looking for cases like:
+/// r0 = muls ...
+/// r1 = add.w r0, ...
+/// ...
+///    = mul.w r1
+/// In this case it would have been ok to narrow the mul.w to muls since there
+/// are indirect RAW dependency between the muls and the mul.w
+bool
+Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) {
+  if (!Def || !STI->avoidCPSRPartialUpdate())
+    return false;
+
+  SmallSet<unsigned, 2> Defs;
+  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = Def->getOperand(i);
+    if (!MO.isReg() || MO.isUndef() || MO.isUse())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0 || Reg == ARM::CPSR)
+      continue;
+    Defs.insert(Reg);
+  }
+
+  for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = Use->getOperand(i);
+    if (!MO.isReg() || MO.isUndef() || MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (Defs.count(Reg))
+      return false;
+  }
+
+  // No read-after-write dependency. The narrowing will add false dependency.
+  return true;
+}
+
 bool
 Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
                                   bool is2Addr, ARMCC::CondCodes Pred,
@@ -410,7 +465,10 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     MIB.addOperand(MI->getOperand(OpNum));
 
   // Transfer memoperands.
-  (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+  // Transfer MI flags.
+  MIB.setMIFlags(MI->getFlags());
 
   DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
 
@@ -422,7 +480,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
 bool
 Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
                                 const ReduceEntry &Entry,
-                                bool LiveCPSR) {
+                                bool LiveCPSR, MachineInstr *CPSRDef) {
   if (Entry.LowRegs1 && !VerifyLowRegs(MI))
     return false;
 
@@ -440,12 +498,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
       switch (Opc) {
       default: break;
       case ARM::t2ADDSri: {
-        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR))
+        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
           return true;
         // fallthrough
       }
       case ARM::t2ADDSrr:
-        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
       }
     }
     break;
@@ -453,13 +511,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
   case ARM::t2RSBri:
   case ARM::t2RSBSri:
     if (MI->getOperand(2).getImm() == 0)
-      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
     break;
   case ARM::t2MOVi16:
     // Can convert only 'pure' immediate operands, not immediates obtained as
     // globals' addresses.
     if (MI->getOperand(1).isImm())
-      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
     break;
   case ARM::t2CMPrr: {
     // Try to reduce to the lo-reg only version first. Why there are two
@@ -468,17 +526,17 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
     // are prioritized, but the table assumes a unique entry for each
     // source insn opcode. So for now, we hack a local entry record to use.
     static const ReduceEntry NarrowEntry =
-      { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 1 };
-    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR))
+      { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
+    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef))
       return true;
-    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
   }
   case ARM::t2ADDrSPi: {
     static const ReduceEntry NarrowEntry =
-      { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 1 };
+      { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 0,1 };
     if (MI->getOperand(0).getReg() == ARM::SP)
-      return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR);
-    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+      return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef);
+    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
   }
   }
   return false;
@@ -487,7 +545,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
 bool
 Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
                                 const ReduceEntry &Entry,
-                                bool LiveCPSR) {
+                                bool LiveCPSR, MachineInstr *CPSRDef) {
 
   if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
     return false;
@@ -542,6 +600,12 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
   if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
     return false;
 
+  // Avoid adding a false dependency on partial flag update by some 16-bit
+  // instructions which has the 's' bit set.
+  if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC &&
+      canAddPseudoFlagDep(CPSRDef, MI))
+    return false;
+
   // Add the 16-bit instruction.
   DebugLoc dl = MI->getDebugLoc();
   MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
@@ -563,6 +627,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
     MIB.addOperand(MI->getOperand(i));
   }
 
+  // Transfer MI flags.
+  MIB.setMIFlags(MI->getFlags());
+
   DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
 
   MBB.erase(MI);
@@ -573,7 +640,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
 bool
 Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
                                  const ReduceEntry &Entry,
-                                 bool LiveCPSR) {
+                                 bool LiveCPSR, MachineInstr *CPSRDef) {
   if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
     return false;
 
@@ -626,6 +693,12 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
   if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
     return false;
 
+  // Avoid adding a false dependency on partial flag update by some 16-bit
+  // instructions which has the 's' bit set.
+  if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC &&
+      canAddPseudoFlagDep(CPSRDef, MI))
+    return false;
+
   // Add the 16-bit instruction.
   DebugLoc dl = MI->getDebugLoc();
   MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
@@ -663,6 +736,9 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
   if (!TID.isPredicable() && NewTID.isPredicable())
     AddDefaultPred(MIB);
 
+  // Transfer MI flags.
+  MIB.setMIFlags(MI->getFlags());
+
   DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
 
   MBB.erase(MI);
@@ -670,7 +746,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
   return true;
 }
 
-static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) {
+static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
   bool HasDef = false;
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
@@ -678,6 +754,8 @@ static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) {
       continue;
     if (MO.getReg() != ARM::CPSR)
       continue;
+
+    DefCPSR = true;
     if (!MO.isDead())
       HasDef = true;
   }
@@ -707,6 +785,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
 
   // Yes, CPSR could be livein.
   bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
+  MachineInstr *CPSRDef = 0;
 
   MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
   MachineBasicBlock::iterator NextMII;
@@ -722,7 +801,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
       const ReduceEntry &Entry = ReduceTable[OPI->second];
       // Ignore "special" cases for now.
       if (Entry.Special) {
-        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR)) {
+        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
           Modified = true;
           MachineBasicBlock::iterator I = prior(NextMII);
           MI = &*I;
@@ -731,7 +810,8 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
       }
 
       // Try to transform to a 16-bit two-address instruction.
-      if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) {
+      if (Entry.NarrowOpc2 &&
+          ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
         Modified = true;
         MachineBasicBlock::iterator I = prior(NextMII);
         MI = &*I;
@@ -739,7 +819,8 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
       }
 
       // Try to transform to a 16-bit non-two-address instruction.
-      if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) {
+      if (Entry.NarrowOpc1 &&
+          ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
         Modified = true;
         MachineBasicBlock::iterator I = prior(NextMII);
         MI = &*I;
@@ -747,7 +828,14 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
     }
 
   ProcessNext:
-    LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR);
+    bool DefCPSR = false;
+    LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
+    if (MI->getDesc().isCall())
+      // Calls don't really set CPSR.
+      CPSRDef = 0;
+    else if (DefCPSR)
+      // This is the last CPSR defining instruction.
+      CPSRDef = MI;
   }
 
   return Modified;
@@ -756,6 +844,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
 bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
   const TargetMachine &TM = MF.getTarget();
   TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+  STI = &TM.getSubtarget<ARMSubtarget>();
 
   bool Modified = false;
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
diff --git a/lib/Target/Alpha/Alpha.td b/lib/Target/Alpha/Alpha.td
index 4508eda..ae79c2e 100644
--- a/lib/Target/Alpha/Alpha.td
+++ b/lib/Target/Alpha/Alpha.td
@@ -21,7 +21,7 @@ include "llvm/Target/Target.td"
 //===----------------------------------------------------------------------===//
 
 def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true",
-                                  "Enable CIX extentions">;
+                                  "Enable CIX extensions">;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index c4f43ab..ee404f0 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -296,7 +296,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
   // Build a sequence of copy-to-reg nodes chained together with token chain and
   // flag operands which copy the outgoing args into registers.  The InFlag in
-  // necessary since all emited instructions must be stuck together.
+  // necessary since all emitted instructions must be stuck together.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index 099d715..b201712 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -1030,7 +1030,7 @@ def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),
 //WMB Mfc 18.4400 Write memory barrier
 //MF_FPCR F-P 17.025 Move from FPCR
 //MT_FPCR F-P 17.024 Move to FPCR
-//There are in the Multimedia extentions, so let's not use them yet
+//There are in the Multimedia extensions, so let's not use them yet
 //def MAXSB8  : OForm<0x1C, 0x3E, "MAXSB8 $RA,$RB,$RC">; //Vector signed byte maximum
 //def MAXSW4 : OForm< 0x1C, 0x3F, "MAXSW4 $RA,$RB,$RC">; //Vector signed word maximum
 //def MAXUB8  : OForm<0x1C, 0x3C, "MAXUB8 $RA,$RB,$RC">; //Vector unsigned byte maximum
diff --git a/lib/Target/Alpha/README.txt b/lib/Target/Alpha/README.txt
index 9ae1517..cc170e3 100644
--- a/lib/Target/Alpha/README.txt
+++ b/lib/Target/Alpha/README.txt
@@ -33,9 +33,9 @@ add crazy vector instructions (MVI):
 (MIN|MAX)(U|S)(B8|W4) min and max, signed and unsigned, byte and word
 PKWB, UNPKBW pack/unpack word to byte
 PKLB UNPKBL pack/unpack long to byte
-PERR pixel error (sum accross bytes of bytewise abs(i8v8 a - i8v8 b))
+PERR pixel error (sum across bytes of bytewise abs(i8v8 a - i8v8 b))
 
-cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extentions)
+cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extensions)
 
 this has some good examples for other operations that can be synthesised well 
 from these rather meager vector ops (such as saturating add).
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index 7c80eec..1e1f8c9 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -345,7 +345,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
   // Build a sequence of copy-to-reg nodes chained together with token
   // chain and flag operands which copy the outgoing args into registers.
-  // The InFlag in necessary since all emited instructions must be
+  // The InFlag in necessary since all emitted instructions must be
   // stuck together.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 6c555a3..358d1b3 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -2440,24 +2440,6 @@ void CWriter::visitReturnInst(ReturnInst &I) {
     return;
   }
 
-  if (I.getNumOperands() > 1) {
-    Out << "  {\n";
-    Out << "    ";
-    printType(Out, I.getParent()->getParent()->getReturnType());
-    Out << "   llvm_cbe_mrv_temp = {\n";
-    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
-      Out << "      ";
-      writeOperand(I.getOperand(i));
-      if (i != e - 1)
-        Out << ",";
-      Out << "\n";
-    }
-    Out << "    };\n";
-    Out << "    return llvm_cbe_mrv_temp;\n";
-    Out << "  }\n";
-    return;
-  }
-
   Out << "  return";
   if (I.getNumOperands()) {
     Out << ' ';
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
index 5ef5716..f340edf 100644
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -24,7 +24,7 @@
 // 5. The code sequences for r64 and v2i64 are probably overly conservative,
 //    compared to the code that gcc produces.
 //
-// M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!)
+// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!)
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 
 // selb instruction definition for i64. Note that the selection mask is
diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp
index 4040461..fd96694 100644
--- a/lib/Target/CellSPU/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -182,6 +182,10 @@ namespace {
       printOp(MI->getOperand(OpNo), O);
     }
 
+    void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      printOp(MI->getOperand(OpNo), O);
+    }
+
     void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
       // Used to generate a ".-<target>", but it turns out that the assembler
       // really wants the target.
@@ -279,6 +283,9 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
     }
     O << *Mang->getSymbol(MO.getGlobal());
     return;
+  case MachineOperand::MO_MCSymbol:
+    O << *(MO.getMCSymbol());
+    return;
   default:
     O << "<unknown operand type: " << MO.getType() << ">";
     return;
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index d226156..9351ffd 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -321,12 +321,17 @@ SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
   // These match the addr256k operand type:
   EVT OffsVT = MVT::i16;
   SDValue Zero = CurDAG->getTargetConstant(0, OffsVT);
+  int64_t val;
 
   switch (N.getOpcode()) {
   case ISD::Constant:
+    val = dyn_cast<ConstantSDNode>(N.getNode())->getSExtValue();
+    Base = CurDAG->getTargetConstant( val , MVT::i32);
+    Index = Zero;
+    return true; break;
   case ISD::ConstantPool:
   case ISD::GlobalAddress:
-    report_fatal_error("SPU SelectAFormAddr: Constant/Pool/Global not lowered.");
+    report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered.");
     /*NOTREACHED*/
 
   case ISD::TargetConstant:
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 743a4d7..8668da3 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -705,7 +705,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
                                                  offset
                                                 ));
 
-    // Shift the low similarily
+    // Shift the low similarly
     // TODO: add SPUISD::SHL_BYTES
     low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
 
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index dd48d7b..cf883e2 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -183,14 +183,6 @@ namespace llvm {
 
     virtual bool isLegalAddressingMode(const AddrMode &AM,
                                        const Type *Ty) const;
-
-    /// After allocating this many registers, the allocator should feel
-    /// register pressure. The value is a somewhat random guess, based on the
-    /// number of non callee saved registers in the C calling convention.
-    virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC,
-                                          MachineFunction &MF) const{
-      return 50;
-    }
   };
 }
 
diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td
index 21bc275..bdbe255 100644
--- a/lib/Target/CellSPU/SPUInstrFormats.td
+++ b/lib/Target/CellSPU/SPUInstrFormats.td
@@ -296,3 +296,25 @@ class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
   let Pattern = pattern;
   let Inst{31-0} = 0;
 }
+
+//===----------------------------------------------------------------------===//
+// Branch hint formats
+//===----------------------------------------------------------------------===//
+// For hbrr and hbra
+class HBI16Form<bits<7> opcode, dag IOL, string asmstr>
+        : Instruction {
+  field bits<32> Inst;
+  bits<16>i16;
+  bits<9>RO;
+
+  let Namespace = "SPU";
+  let InOperandList = IOL;
+  let OutOperandList = (outs); //no output
+  let AsmString = asmstr;
+  let Itinerary = BranchHints;
+
+  let Inst{0-6} = opcode;
+  let Inst{7-8} = RO{8-7};
+  let Inst{9-24} = i16;
+  let Inst{25-31} = RO{6-0};
+}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index f9e6c72..080434d 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCContext.h"
 
 using namespace llvm;
 
@@ -281,9 +282,20 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
   return true;
 }
 
+// search MBB for branch hint labels and branch hit ops
+static void removeHBR( MachineBasicBlock &MBB) {
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I){
+    if (I->getOpcode() == SPU::HBRA ||
+        I->getOpcode() == SPU::HBR_LABEL){
+      I=MBB.erase(I);
+    }
+  }
+}
+
 unsigned
 SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator I = MBB.end();
+  removeHBR(MBB);
   if (I == MBB.begin())
     return 0;
   --I;
@@ -314,6 +326,23 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   return 2;
 }
 
+/** Find the optimal position for a hint branch instruction in a basic block.
+ * This should take into account:
+ *   -the branch hint delays
+ *   -congestion of the memory bus
+ *   -dual-issue scheduling (i.e. avoid insertion of nops)
+ * Current implementation is rather simplistic.
+ */
+static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB)
+{
+   MachineBasicBlock::iterator J = MBB.end();
+	for( int i=0; i<8; i++) {
+		if( J == MBB.begin() ) return J;
+		J--;
+	}
+	return J;
+}
+
 unsigned
 SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                            MachineBasicBlock *FBB,
@@ -324,32 +353,61 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   assert((Cond.size() == 2 || Cond.size() == 0) &&
          "SPU branch conditions have two components!");
 
+  MachineInstrBuilder MIB;
+  //TODO: make a more accurate algorithm.
+  bool haveHBR = MBB.size()>8;
+  
+  removeHBR(MBB);
+  MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol();
+  // Add a label just before the branch
+  if (haveHBR)
+    MIB = BuildMI(&MBB, DL, get(SPU::HBR_LABEL)).addSym(branchLabel);
+
   // One-way branch.
   if (FBB == 0) {
     if (Cond.empty()) {
       // Unconditional branch
-      MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(SPU::BR));
+      MIB = BuildMI(&MBB, DL, get(SPU::BR));
       MIB.addMBB(TBB);
 
       DEBUG(errs() << "Inserted one-way uncond branch: ");
       DEBUG((*MIB).dump());
+
+      // basic blocks have just one branch so it is safe to add the hint a its
+      if (haveHBR) {
+        MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+        MIB.addSym(branchLabel);
+        MIB.addMBB(TBB);
+      }	
     } else {
       // Conditional branch
-      MachineInstrBuilder  MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+      MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
       MIB.addReg(Cond[1].getReg()).addMBB(TBB);
 
+      if (haveHBR) {
+        MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+        MIB.addSym(branchLabel);
+        MIB.addMBB(TBB);
+      }	
+
       DEBUG(errs() << "Inserted one-way cond branch:   ");
       DEBUG((*MIB).dump());
     }
     return 1;
   } else {
-    MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+    MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
     MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR));
 
     // Two-way Conditional Branch.
     MIB.addReg(Cond[1].getReg()).addMBB(TBB);
     MIB2.addMBB(FBB);
 
+    if (haveHBR) {
+      MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+      MIB.addSym(branchLabel);
+      MIB.addMBB(FBB);
+    }	
+
     DEBUG(errs() << "Inserted conditional branch:    ");
     DEBUG((*MIB).dump());
     DEBUG(errs() << "part 2: ");
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 25f6fd0..e103c9b 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -28,6 +28,8 @@ let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in {
   def ADJCALLSTACKUP   : Pseudo<(outs), (ins u16imm_i32:$amt),
                                 "${:comment} ADJCALLSTACKUP",
                                 [(callseq_end timm:$amt)]>;
+  def HBR_LABEL        : Pseudo<(outs), (ins hbrtarget:$targ), 
+                                "$targ:\t${:comment}branch hint target",[ ]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2013,9 +2015,9 @@ class SHLHInst<dag OOL, dag IOL, list<dag> pattern>:
            RotShiftVec, pattern>;
 
 class SHLHVecInst<ValueType vectype>:
-    SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB),
+    SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
              [(set (vectype VECREG:$rT),
-                   (SPUvec_shl (vectype VECREG:$rA), R16C:$rB))]>;
+                   (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
 
 multiclass ShiftLeftHalfword
 {
@@ -2063,9 +2065,9 @@ class SHLInst<dag OOL, dag IOL, list<dag> pattern>:
 multiclass ShiftLeftWord
 {
   def v4i32:
-      SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB),
+      SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
               [(set (v4i32 VECREG:$rT),
-                    (SPUvec_shl (v4i32 VECREG:$rA), R16C:$rB))]>;
+                    (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
   def r32:
       SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
               [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>;
@@ -2511,19 +2513,11 @@ class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>:
            RotShiftVec, pattern>;
 
 def ROTHMv8i16:
-    ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+    ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
               [/* see patterns below - $rB must be negated */]>;
 
-def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R32C:$rB),
-          (ROTHMv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R16C:$rB),
-          (ROTHMv8i16 VECREG:$rA,
-                      (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R8C:$rB),
-          (ROTHMv8i16 VECREG:$rA,
-                      (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>;
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+          (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
 
 // ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left
 // Note: This instruction doesn't match a pattern because rB must be negated
@@ -2584,19 +2578,11 @@ class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
            RotShiftVec, pattern>;
 
 def ROTMv4i32:
-    ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+    ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
              [/* see patterns below - $rB must be negated */]>;
 
-def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R32C:$rB),
-          (ROTMv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R16C:$rB),
-          (ROTMv4i32 VECREG:$rA,
-                     (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R8C:$rB),
-          (ROTMv4i32 VECREG:$rA,
-                     (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+          (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>;
 
 def ROTMr32:
     ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
@@ -2802,20 +2788,12 @@ defm ROTQMBII: RotateMaskQuadByBitsImm;
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 
 def ROTMAHv8i16:
-    RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+    RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
       "rotmah\t$rT, $rA, $rB", RotShiftVec,
       [/* see patterns below - $rB must be negated */]>;
 
-def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R32C:$rB),
-          (ROTMAHv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R16C:$rB),
-          (ROTMAHv8i16 VECREG:$rA,
-                       (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R8C:$rB),
-          (ROTMAHv8i16 VECREG:$rA,
-                       (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+          (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
 
 def ROTMAHr16:
     RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
@@ -2857,20 +2835,12 @@ def : Pat<(sra R16C:$rA, (i8 imm:$val)),
           (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
 
 def ROTMAv4i32:
-    RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+    RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
       "rotma\t$rT, $rA, $rB", RotShiftVec,
       [/* see patterns below - $rB must be negated */]>;
 
-def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R32C:$rB),
-          (ROTMAv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R16C:$rB),
-          (ROTMAv4i32 VECREG:$rA,
-                      (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R8C:$rB),
-          (ROTMAv4i32 VECREG:$rA,
-                      (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+          (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>;
 
 def ROTMAr32:
     RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
@@ -4208,8 +4178,8 @@ def : Pat<(fabs (v4f32 VECREG:$rA)),
 //===----------------------------------------------------------------------===//
 // Hint for branch instructions:
 //===----------------------------------------------------------------------===//
-
-/* def HBR : SPUInstr<(outs), (ins), "hbr\t" */
+def HBRA :
+    HBI16Form<0b0001001,(ins hbrtarget:$brinst, brtarget:$btarg), "hbra\t$brinst, $btarg">;
 
 //===----------------------------------------------------------------------===//
 // Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 641da04..1708c59 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -46,6 +46,14 @@ namespace llvm {
     virtual const TargetRegisterClass *
     getPointerRegClass(unsigned Kind = 0) const;
 
+    /// After allocating this many registers, the allocator should feel
+    /// register pressure. The value is a somewhat random guess, based on the
+    /// number of non callee saved registers in the C calling convention.
+    virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC,
+                                          MachineFunction &MF) const{
+      return 50;
+    }
+
     //! Return the array of callee-saved registers
     virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const;
 
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 71d6049..797cfd5 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1348,12 +1348,10 @@ void CppWriter::printInstruction(const Instruction *I,
     const PHINode* phi = cast<PHINode>(I);
 
     Out << "PHINode* " << iName << " = PHINode::Create("
-        << getCppName(phi->getType()) << ", \"";
+        << getCppName(phi->getType()) << ", "
+        << phi->getNumIncomingValues() << ", \"";
     printEscapedString(phi->getName());
     Out << "\", " << bbname << ");";
-    nl(Out) << iName << "->reserveOperandSpace("
-      << phi->getNumIncomingValues()
-        << ");";
     nl(Out);
     for (unsigned i = 0; i < phi->getNumOperands(); i+=2) {
       Out << iName << "->addIncoming("
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index 3379ac2..060a87b 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -57,18 +57,26 @@ static unsigned mblazeBinary2Opcode[] = {
 };
 
 static unsigned getRD(uint32_t insn) {
+  if (!MBlazeRegisterInfo::isRegister((insn>>21)&0x1F))
+    return UNSUPPORTED;
   return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>21)&0x1F);
 }
 
 static unsigned getRA(uint32_t insn) {
+  if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F))
+    return UNSUPPORTED;
   return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F);
 }
 
 static unsigned getRB(uint32_t insn) {
+  if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F))
+    return UNSUPPORTED;
   return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F);
 }
 
 static int64_t getRS(uint32_t insn) {
+  if (!MBlazeRegisterInfo::isSpecialRegister(insn&0x3FFF))
+    return UNSUPPORTED;
   return MBlazeRegisterInfo::getSpecialRegisterFromNumbering(insn&0x3FFF);
 }
 
@@ -489,13 +497,14 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
                                         raw_ostream &vStream) const {
   // The machine instruction.
   uint32_t insn;
+  uint64_t read;
   uint8_t bytes[4];
 
-  // We always consume 4 bytes of data
-  size = 4;
+  // By default we consume 1 byte on failure
+  size = 1;
 
   // We want to read exactly 4 bytes of data.
-  if (region.readBytes(address, 4, (uint8_t*)bytes, NULL) == -1)
+  if (region.readBytes(address, 4, (uint8_t*)bytes, &read) == -1 || read < 4)
     return false;
 
   // Encoded as a big-endian 32-bit word in the stream.
@@ -509,44 +518,63 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
 
   instr.setOpcode(opcode);
 
+  unsigned RD = getRD(insn);
+  unsigned RA = getRA(insn);
+  unsigned RB = getRB(insn);
+  unsigned RS = getRS(insn);
+
   uint64_t tsFlags = MBlazeInsts[opcode].TSFlags;
   switch ((tsFlags & MBlazeII::FormMask)) {
-  default: llvm_unreachable("unknown instruction encoding");
+  default: 
+    return false;
 
   case MBlazeII::FRRRR:
-    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
-    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
-    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RB));
+    instr.addOperand(MCOperand::CreateReg(RA));
     break;
 
   case MBlazeII::FRRR:
-    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
-    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
-    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RA));
+    instr.addOperand(MCOperand::CreateReg(RB));
     break;
 
   case MBlazeII::FRI:
     switch (opcode) {
-    default: llvm_unreachable("unknown instruction encoding");
+    default: 
+      return false;
     case MBlaze::MFS:
-      instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+      if (RD == UNSUPPORTED)
+        return false;
+      instr.addOperand(MCOperand::CreateReg(RD));
       instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
       break;
     case MBlaze::MTS:
+      if (RA == UNSUPPORTED)
+        return false;
       instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
-      instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+      instr.addOperand(MCOperand::CreateReg(RA));
       break;
     case MBlaze::MSRSET:
     case MBlaze::MSRCLR:
-      instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+      if (RD == UNSUPPORTED)
+        return false;
+      instr.addOperand(MCOperand::CreateReg(RD));
       instr.addOperand(MCOperand::CreateImm(insn&0x7FFF));
       break;
     }
     break;
 
   case MBlazeII::FRRI:
-    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
-    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RA));
     switch (opcode) {
     default:
       instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
@@ -560,27 +588,37 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
     break;
 
   case MBlazeII::FCRR:
-    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
-    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    if (RA == UNSUPPORTED || RB == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RA));
+    instr.addOperand(MCOperand::CreateReg(RB));
     break;
 
   case MBlazeII::FCRI:
-    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    if (RA == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RA));
     instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
     break;
 
   case MBlazeII::FRCR:
-    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
-    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    if (RD == UNSUPPORTED || RB == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RB));
     break;
 
   case MBlazeII::FRCI:
-    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    if (RD == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
     instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
     break;
 
   case MBlazeII::FCCR:
-    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    if (RB == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RB));
     break;
 
   case MBlazeII::FCCI:
@@ -588,33 +626,45 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
     break;
 
   case MBlazeII::FRRCI:
-    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
-    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RA));
     instr.addOperand(MCOperand::CreateImm(getSHT(insn)));
     break;
 
   case MBlazeII::FRRC:
-    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
-    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RA));
     break;
 
   case MBlazeII::FRCX:
-    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    if (RD == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
     instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
     break;
 
   case MBlazeII::FRCS:
-    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
-    instr.addOperand(MCOperand::CreateReg(getRS(insn)));
+    if (RD == UNSUPPORTED || RS == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RS));
     break;
 
   case MBlazeII::FCRCS:
-    instr.addOperand(MCOperand::CreateReg(getRS(insn)));
-    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    if (RS == UNSUPPORTED || RA == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RS));
+    instr.addOperand(MCOperand::CreateReg(RA));
     break;
 
   case MBlazeII::FCRCX:
-    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    if (RA == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RA));
     instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
     break;
 
@@ -623,16 +673,23 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
     break;
 
   case MBlazeII::FCR:
-    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    if (RB == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RB));
     break;
 
   case MBlazeII::FRIR:
-    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+      return false;
+    instr.addOperand(MCOperand::CreateReg(RD));
     instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
-    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    instr.addOperand(MCOperand::CreateReg(RA));
     break;
   }
 
+  // We always consume 4 bytes of data on success
+  size = 4;
+
   return true;
 }
 
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
index bebc6c8..13c4b49 100644
--- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -18,11 +18,12 @@
 
 namespace llvm {
   class MCOperand;
+  class TargetMachine;
 
   class MBlazeInstPrinter : public MCInstPrinter {
   public:
-    MBlazeInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {
-    }
+    MBlazeInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+      : MCInstPrinter(MAI) {}
 
     virtual void printInst(const MCInst *MI, raw_ostream &O);
 
diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td
index 1fa1e4d..1245658 100644
--- a/lib/Target/MBlaze/MBlaze.td
+++ b/lib/Target/MBlaze/MBlaze.td
@@ -31,49 +31,28 @@ def MBlazeInstrInfo : InstrInfo;
 // Microblaze Subtarget features                                              //
 //===----------------------------------------------------------------------===//
 
-def FeaturePipe3       : SubtargetFeature<"pipe3", "HasPipe3", "true",
-                                "Implements 3-stage pipeline">;
 def FeatureBarrel      : SubtargetFeature<"barrel", "HasBarrel", "true",
                                 "Implements barrel shifter">;
 def FeatureDiv         : SubtargetFeature<"div", "HasDiv", "true",
                                 "Implements hardware divider">;
 def FeatureMul         : SubtargetFeature<"mul", "HasMul", "true",
                                 "Implements hardware multiplier">;
-def FeatureFSL         : SubtargetFeature<"fsl", "HasFSL", "true",
-                                "Implements FSL instructions">;
-def FeatureEFSL        : SubtargetFeature<"efsl", "HasEFSL", "true",
-                                "Implements extended FSL instructions">;
-def FeatureMSRSet      : SubtargetFeature<"msrset", "HasMSRSet", "true",
-                                "Implements MSR register set and clear">;
-def FeatureException   : SubtargetFeature<"exception", "HasException", "true",
-                                "Implements hardware exception support">;
 def FeaturePatCmp      : SubtargetFeature<"patcmp", "HasPatCmp", "true",
                                 "Implements pattern compare instruction">;
 def FeatureFPU         : SubtargetFeature<"fpu", "HasFPU", "true",
                                 "Implements floating point unit">;
-def FeatureESR         : SubtargetFeature<"esr", "HasESR", "true",
-                                "Implements ESR and EAR registers">;
-def FeaturePVR         : SubtargetFeature<"pvr", "HasPVR", "true",
-                                "Implements processor version register">;
 def FeatureMul64       : SubtargetFeature<"mul64", "HasMul64", "true",
                                 "Implements multiplier with 64-bit result">;
 def FeatureSqrt        : SubtargetFeature<"sqrt", "HasSqrt", "true",
                                 "Implements sqrt and floating point convert">;
-def FeatureMMU         : SubtargetFeature<"mmu", "HasMMU", "true",
-                                "Implements memory management unit">;
 
 //===----------------------------------------------------------------------===//
 // MBlaze processors supported.
 //===----------------------------------------------------------------------===//
 
-class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, MBlazeGenericItineraries, Features>;
-
-def : Proc<"v400", []>;
-def : Proc<"v500", []>;
-def : Proc<"v600", []>;
-def : Proc<"v700", []>;
-def : Proc<"v710", []>;
+def : Processor<"mblaze",  MBlazeGenericItineraries, []>;
+def : Processor<"mblaze3", MBlazePipe3Itineraries, []>;
+def : Processor<"mblaze5", MBlazePipe5Itineraries, []>;
 
 //===----------------------------------------------------------------------===//
 // Instruction Descriptions
diff --git a/lib/Target/MBlaze/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MBlazeAsmBackend.cpp
index a4b21af..08f14c3 100644
--- a/lib/Target/MBlaze/MBlazeAsmBackend.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmBackend.cpp
@@ -150,14 +150,13 @@ void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
 
 TargetAsmBackend *llvm::createMBlazeAsmBackend(const Target &T,
                                             const std::string &TT) {
-  switch (Triple(TT).getOS()) {
-  case Triple::Darwin:
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin())
     assert(0 && "Mac not supported on MBlaze");
-  case Triple::MinGW32:
-  case Triple::Cygwin:
-  case Triple::Win32:
+
+  if (TheTriple.isOSWindows())
     assert(0 && "Windows not supported on MBlaze");
-  default:
-    return new ELFMBlazeAsmBackend(T, Triple(TT).getOS());
-  }
+
+  return new ELFMBlazeAsmBackend(T, TheTriple.getOS());
 }
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index 0016df5..0f0f60e 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -319,10 +319,11 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
 }
 
 static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
+                                                TargetMachine &TM,
                                                 unsigned SyntaxVariant,
                                                 const MCAsmInfo &MAI) {
   if (SyntaxVariant == 0)
-    return new MBlazeInstPrinter(MAI);
+    return new MBlazeInstPrinter(TM, MAI);
   return 0;
 }
 
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index 4399ee2..973e968 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -77,7 +77,7 @@ static bool hasImmInstruction(MachineBasicBlock::iterator &candidate) {
 
         // We must assume that unknown immediate values require more than
         // 16-bits to represent.
-        if (mop.isGlobal() || mop.isSymbol())
+        if (mop.isGlobal() || mop.isSymbol() || mop.isJTI() || mop.isCPI())
           return true;
 
         // FIXME: we could probably check to see if the FP value happens
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index f39826b..21a5988 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -274,7 +274,7 @@ MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI,
   F->insert(It, loop);
   F->insert(It, finish);
 
-  // Update machine-CFG edges by transfering adding all successors and
+  // Update machine-CFG edges by transferring adding all successors and
   // remaining instructions from the current block to the new block which
   // will contain the Phi node for the select.
   finish->splice(finish->begin(), MBB,
@@ -456,7 +456,7 @@ MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI,
   F->insert(It, start);
   F->insert(It, exit);
 
-  // Update machine-CFG edges by transfering adding all successors and
+  // Update machine-CFG edges by transferring adding all successors and
   // remaining instructions from the current block to the new block which
   // will contain the Phi node for the select.
   exit->splice(exit->begin(), MBB, llvm::next(MachineBasicBlock::iterator(MI)),
@@ -778,7 +778,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
 
   // Build a sequence of copy-to-reg nodes chained together with token
   // chain and flag operands which copy the outgoing args into registers.
-  // The InFlag in necessary since all emited instructions must be
+  // The InFlag in necessary since all emitted instructions must be
   // stuck together.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
@@ -1103,7 +1103,7 @@ MBlazeTargetLowering::getSingleConstraintMatchWeight(
   switch (*constraint) {
   default:
     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
-    break;
+    break;
   case 'd':
   case 'y':
     if (type->isIntegerTy())
diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td
index 094de5c..4acdcfd 100644
--- a/lib/Target/MBlaze/MBlazeInstrFPU.td
+++ b/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -21,22 +21,22 @@
 class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> :
              TA<op, 0x000, (outs GPR:$dst), (ins memrr:$addr),
                 !strconcat(instr_asm, "   $dst, $addr"),
-                [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IILoad>;
+                [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IIC_MEMl>;
 
 class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
               TB<op, (outs GPR:$dst), (ins memri:$addr),
                  !strconcat(instr_asm, "   $dst, $addr"),
-                 [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>;
+                 [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>;
 
 class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> :
               TA<op, 0x000, (outs), (ins GPR:$dst, memrr:$addr),
                  !strconcat(instr_asm, "   $dst, $addr"),
-                 [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIStore>;
+                 [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIC_MEMs>;
 
 class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
                TB<op, (outs), (ins GPR:$dst, memrr:$addr),
                   !strconcat(instr_asm, "   $dst, $addr"),
-                  [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIStore>;
+                  [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIC_MEMs>;
 
 class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
              InstrItinClass itin> :
@@ -56,15 +56,10 @@ class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
                  !strconcat(instr_asm, "   $dst, $c, $b"),
                  [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
 
-class LogicF<bits<6> op, string instr_asm> :
-             TB<op, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
-                !strconcat(instr_asm, "   $dst, $b, $c"),
-                [], IIAlu>;
-
 class LogicFI<bits<6> op, string instr_asm> :
              TB<op, (outs GPR:$dst), (ins GPR:$b, fimm:$c),
                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                [], IIAlu>;
+                [], IIC_ALU>;
 
 let rb=0 in {
   class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
@@ -95,10 +90,10 @@ let rb=0 in {
 //===----------------------------------------------------------------------===//
 let Predicates=[HasFPU] in {
   def FORI   : LogicFI<0x28, "ori    ">;
-  def FADD   :  ArithF<0x16, 0x000, "fadd   ", fadd, IIAlu>;
-  def FRSUB  : ArithFR<0x16, 0x080, "frsub  ", fsub, IIAlu>;
-  def FMUL   :  ArithF<0x16, 0x100, "fmul   ", fmul, IIAlu>;
-  def FDIV   :  ArithF<0x16, 0x180, "fdiv   ", fdiv, IIAlu>;
+  def FADD   :  ArithF<0x16, 0x000, "fadd   ", fadd, IIC_FPU>;
+  def FRSUB  : ArithFR<0x16, 0x080, "frsub  ", fsub, IIC_FPU>;
+  def FMUL   :  ArithF<0x16, 0x100, "fmul   ", fmul, IIC_FPU>;
+  def FDIV   :  ArithF<0x16, 0x180, "fdiv   ", fdiv, IIC_FPUd>;
 }
 
 let Predicates=[HasFPU], isCodeGenOnly=1 in {
@@ -110,19 +105,19 @@ let Predicates=[HasFPU], isCodeGenOnly=1 in {
 }
 
 let Predicates=[HasFPU,HasSqrt] in {
-  def FLT    : ArithIF<0x16, 0x280, "flt    ", IIAlu>;
-  def FINT   : ArithFI<0x16, 0x300, "fint   ", IIAlu>;
-  def FSQRT  : ArithF2<0x16, 0x380, "fsqrt  ", IIAlu>;
+  def FLT    : ArithIF<0x16, 0x280, "flt    ", IIC_FPUf>;
+  def FINT   : ArithFI<0x16, 0x300, "fint   ", IIC_FPUi>;
+  def FSQRT  : ArithF2<0x16, 0x380, "fsqrt  ", IIC_FPUs>;
 }
 
 let isAsCheapAsAMove = 1 in {
-  def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIAlu>;
-  def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIAlu>;
-  def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIAlu>;
-  def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIAlu>;
-  def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIAlu>;
-  def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIAlu>;
-  def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIAlu>;
+  def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIC_FPUc>;
+  def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIC_FPUc>;
+  def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIC_FPUc>;
+  def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIC_FPUc>;
+  def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIC_FPUc>;
+  def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIC_FPUc>;
+  def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIC_FPUc>;
 }
 
 
diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td
index 3209845..3082a7e 100644
--- a/lib/Target/MBlaze/MBlazeInstrFSL.td
+++ b/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -13,7 +13,7 @@
 class FSLGet<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
              MBlazeInst<op, FRCX, (outs GPR:$dst), (ins fslimm:$b),
                         !strconcat(instr_asm, " $dst, $b"),
-                        [(set GPR:$dst, (OpNode immZExt4:$b))],IIAlu>
+                        [(set GPR:$dst, (OpNode immZExt4:$b))],IIC_FSLg>
 {
     bits<5> rd;
     bits<4> fslno;
@@ -29,7 +29,7 @@ class FSLGet<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
 class FSLGetD<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
               MBlazeInst<op, FRCR, (outs GPR:$dst), (ins GPR:$b),
                          !strconcat(instr_asm, " $dst, $b"),
-                         [(set GPR:$dst, (OpNode GPR:$b))], IIAlu>
+                         [(set GPR:$dst, (OpNode GPR:$b))], IIC_FSLg>
 {
     bits<5> rd;
     bits<5> rb;
@@ -45,7 +45,7 @@ class FSLGetD<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
 class FSLPut<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
              MBlazeInst<op, FCRCX, (outs), (ins GPR:$v, fslimm:$b),
                         !strconcat(instr_asm, " $v, $b"),
-                        [(OpNode GPR:$v, immZExt4:$b)], IIAlu>
+                        [(OpNode GPR:$v, immZExt4:$b)], IIC_FSLp>
 {
     bits<5> ra;
     bits<4> fslno;
@@ -61,7 +61,7 @@ class FSLPut<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
 class FSLPutD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
               MBlazeInst<op, FCRR, (outs), (ins GPR:$v, GPR:$b),
                          !strconcat(instr_asm, " $v, $b"),
-                         [(OpNode GPR:$v, GPR:$b)], IIAlu>
+                         [(OpNode GPR:$v, GPR:$b)], IIC_FSLp>
 {
     bits<5> ra;
     bits<5> rb;
@@ -77,7 +77,7 @@ class FSLPutD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
 class FSLPutT<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
               MBlazeInst<op, FCX, (outs), (ins fslimm:$b),
                          !strconcat(instr_asm, " $b"),
-                         [(OpNode immZExt4:$b)], IIAlu>
+                         [(OpNode immZExt4:$b)], IIC_FSLp>
 {
     bits<4> fslno;
 
@@ -92,7 +92,7 @@ class FSLPutT<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
 class FSLPutTD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
                MBlazeInst<op, FCR, (outs), (ins GPR:$b),
                           !strconcat(instr_asm, " $b"),
-                          [(OpNode GPR:$b)], IIAlu>
+                          [(OpNode GPR:$b)], IIC_FSLp>
 {
     bits<5> rb;
 
diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td
index d62574d..54f605f 100644
--- a/lib/Target/MBlaze/MBlazeInstrFormats.td
+++ b/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -81,7 +81,7 @@ class MBlazeInst<bits<6> op, Format form, dag outs, dag ins, string asmstr,
 // Pseudo instruction class
 //===----------------------------------------------------------------------===//
 class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
-      MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIPseudo>;
+      MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIC_Pseudo>;
 
 //===----------------------------------------------------------------------===//
 // Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|>
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index b353dcd..794ebed 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -17,6 +17,8 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "MBlazeGenInstrInfo.inc"
 
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index b7300c1..b717da8 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -261,7 +261,6 @@ public:
   virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
     const;
 
-
   virtual void copyPhysReg(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator I, DebugLoc DL,
                            unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index 7b8f70a..896e8ea 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -47,22 +47,22 @@ def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd,
 //===----------------------------------------------------------------------===//
 // MBlaze Instruction Predicate Definitions.
 //===----------------------------------------------------------------------===//
-def HasPipe3     : Predicate<"Subtarget.hasPipe3()">;
+// def HasPipe3     : Predicate<"Subtarget.hasPipe3()">;
 def HasBarrel    : Predicate<"Subtarget.hasBarrel()">;
-def NoBarrel     : Predicate<"!Subtarget.hasBarrel()">;
+// def NoBarrel     : Predicate<"!Subtarget.hasBarrel()">;
 def HasDiv       : Predicate<"Subtarget.hasDiv()">;
 def HasMul       : Predicate<"Subtarget.hasMul()">;
-def HasFSL       : Predicate<"Subtarget.hasFSL()">;
-def HasEFSL      : Predicate<"Subtarget.hasEFSL()">;
-def HasMSRSet    : Predicate<"Subtarget.hasMSRSet()">;
-def HasException : Predicate<"Subtarget.hasException()">;
+// def HasFSL       : Predicate<"Subtarget.hasFSL()">;
+// def HasEFSL      : Predicate<"Subtarget.hasEFSL()">;
+// def HasMSRSet    : Predicate<"Subtarget.hasMSRSet()">;
+// def HasException : Predicate<"Subtarget.hasException()">;
 def HasPatCmp    : Predicate<"Subtarget.hasPatCmp()">;
 def HasFPU       : Predicate<"Subtarget.hasFPU()">;
-def HasESR       : Predicate<"Subtarget.hasESR()">;
-def HasPVR       : Predicate<"Subtarget.hasPVR()">;
+// def HasESR       : Predicate<"Subtarget.hasESR()">;
+// def HasPVR       : Predicate<"Subtarget.hasPVR()">;
 def HasMul64     : Predicate<"Subtarget.hasMul64()">;
 def HasSqrt      : Predicate<"Subtarget.hasSqrt()">;
-def HasMMU       : Predicate<"Subtarget.hasMMU()">;
+// def HasMMU       : Predicate<"Subtarget.hasMMU()">;
 
 //===----------------------------------------------------------------------===//
 // MBlaze Operand, Complex Patterns and Transformations Definitions.
@@ -170,18 +170,18 @@ class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
              Operand Od, PatLeaf imm_type> :
              TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIAlu>;
+                [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_ALU>;
 
 class ArithI32<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
                TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
                   !strconcat(instr_asm, "   $dst, $b, $c"),
-                  [], IIAlu>;
+                  [], IIC_ALU>;
 
 class ShiftI<bits<6> op, bits<2> flags, string instr_asm, SDNode OpNode,
              Operand Od, PatLeaf imm_type> :
              SHT<op, flags, (outs GPR:$dst), (ins GPR:$b, Od:$c),
                  !strconcat(instr_asm, "   $dst, $b, $c"),
-                 [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIAlu>;
+                 [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_SHT>;
 
 class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
             InstrItinClass itin> :
@@ -193,7 +193,7 @@ class ArithRI<bits<6> op, string instr_asm, SDNode OpNode,
              Operand Od, PatLeaf imm_type> :
              TBR<op, (outs GPR:$dst), (ins Od:$b, GPR:$c),
                  !strconcat(instr_asm, "   $dst, $c, $b"),
-                 [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIAlu>;
+                 [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIC_ALU>;
 
 class ArithN<bits<6> op, bits<11> flags, string instr_asm,
             InstrItinClass itin> :
@@ -204,7 +204,7 @@ class ArithN<bits<6> op, bits<11> flags, string instr_asm,
 class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
              TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                [], IIAlu>;
+                [], IIC_ALU>;
 
 class ArithRN<bits<6> op, bits<11> flags, string instr_asm,
             InstrItinClass itin> :
@@ -215,7 +215,7 @@ class ArithRN<bits<6> op, bits<11> flags, string instr_asm,
 class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
              TBR<op, (outs GPR:$dst), (ins Od:$c, GPR:$b),
                  !strconcat(instr_asm, "   $dst, $b, $c"),
-                 [], IIAlu>;
+                 [], IIC_ALU>;
 
 //===----------------------------------------------------------------------===//
 // Misc Arithmetic Instructions
@@ -224,23 +224,23 @@ class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
 class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> :
             TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
                !strconcat(instr_asm, "   $dst, $b, $c"),
-               [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIAlu>;
+               [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIC_ALU>;
 
 class LogicI<bits<6> op, string instr_asm, SDNode OpNode> :
              TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
                 !strconcat(instr_asm, "   $dst, $b, $c"),
                 [(set GPR:$dst, (OpNode GPR:$b, immZExt16:$c))],
-                IIAlu>;
+                IIC_ALU>;
 
 class LogicI32<bits<6> op, string instr_asm> :
                TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
                   !strconcat(instr_asm, "   $dst, $b, $c"),
-                  [], IIAlu>;
+                  [], IIC_ALU>;
 
 class PatCmp<bits<6> op, bits<11> flags, string instr_asm> :
              TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                 [], IIAlu>;
+                 [], IIC_ALU>;
 
 //===----------------------------------------------------------------------===//
 // Memory Access Instructions
@@ -248,22 +248,22 @@ class PatCmp<bits<6> op, bits<11> flags, string instr_asm> :
 class LoadM<bits<6> op, bits<11> flags, string instr_asm> :
             TA<op, flags, (outs GPR:$dst), (ins memrr:$addr),
                !strconcat(instr_asm, "   $dst, $addr"),
-               [], IILoad>;
+               [], IIC_MEMl>;
 
 class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> :
              TB<op, (outs GPR:$dst), (ins memri:$addr),
                 !strconcat(instr_asm, "   $dst, $addr"),
-                [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>;
+                [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>;
 
 class StoreM<bits<6> op, bits<11> flags, string instr_asm> :
              TA<op, flags, (outs), (ins GPR:$dst, memrr:$addr),
                 !strconcat(instr_asm, "   $dst, $addr"),
-                [], IIStore>;
+                [], IIC_MEMs>;
 
 class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> :
               TB<op, (outs), (ins GPR:$dst, memri:$addr),
                  !strconcat(instr_asm, "   $dst, $addr"),
-                 [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIStore>;
+                 [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIC_MEMs>;
 
 //===----------------------------------------------------------------------===//
 // Branch Instructions
@@ -271,7 +271,7 @@ class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> :
 class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
              TA<op, flags, (outs), (ins GPR:$target),
                 !strconcat(instr_asm, "   $target"),
-                [], IIBranch> {
+                [], IIC_BR> {
   let rd = 0x0;
   let ra = br;
   let Form = FCCR;
@@ -280,7 +280,7 @@ class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
 class BranchI<bits<6> op, bits<5> br, string instr_asm> :
               TB<op, (outs), (ins brtarget:$target),
                  !strconcat(instr_asm, "   $target"),
-                 [], IIBranch> {
+                 [], IIC_BR> {
   let rd = 0;
   let ra = br;
   let Form = FCCI;
@@ -292,7 +292,7 @@ class BranchI<bits<6> op, bits<5> br, string instr_asm> :
 class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
               TA<op, flags, (outs), (ins GPR:$link, GPR:$target, variable_ops),
                  !strconcat(instr_asm, "   $link, $target"),
-                 [], IIBranch> {
+                 [], IIC_BRl> {
   let ra = br;
   let Form = FRCR;
 }
@@ -300,7 +300,7 @@ class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
 class BranchLI<bits<6> op, bits<5> br, string instr_asm> :
                TB<op, (outs), (ins GPR:$link, calltarget:$target, variable_ops),
                   !strconcat(instr_asm, "   $link, $target"),
-                  [], IIBranch> {
+                  [], IIC_BRl> {
   let ra = br;
   let Form = FRCI;
 }
@@ -312,7 +312,7 @@ class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
               TA<op, flags, (outs),
                  (ins GPR:$a, GPR:$b),
                  !strconcat(instr_asm, "   $a, $b"),
-                 [], IIBranch> {
+                 [], IIC_BRc> {
   let rd = br;
   let Form = FCRR;
 }
@@ -320,7 +320,7 @@ class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
 class BranchCI<bits<6> op, bits<5> br, string instr_asm> :
                TB<op, (outs), (ins GPR:$a, brtarget:$offset),
                   !strconcat(instr_asm, "   $a, $offset"),
-                  [], IIBranch> {
+                  [], IIC_BRc> {
   let rd = br;
   let Form = FCRI;
 }
@@ -330,71 +330,74 @@ class BranchCI<bits<6> op, bits<5> br, string instr_asm> :
 //===----------------------------------------------------------------------===//
 
 let isCommutable = 1, isAsCheapAsAMove = 1 in {
-  def ADDK   :  Arith<0x04, 0x000, "addk   ", add,  IIAlu>;
+  def ADDK   :  Arith<0x04, 0x000, "addk   ", add,  IIC_ALU>;
   def AND    :  Logic<0x21, 0x000, "and    ", and>;
   def OR     :  Logic<0x20, 0x000, "or     ", or>;
   def XOR    :  Logic<0x22, 0x000, "xor    ", xor>;
-  def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">;
-  def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">;
-  def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">;
+
+  let Predicates=[HasPatCmp] in {
+    def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">;
+    def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">;
+    def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">;
+  }
 
   let Defs = [CARRY] in {
-    def ADD    :  Arith<0x00, 0x000, "add    ", addc, IIAlu>;
+    def ADD    :  Arith<0x00, 0x000, "add    ", addc, IIC_ALU>;
 
     let Uses = [CARRY] in {
-      def ADDC   :  Arith<0x02, 0x000, "addc   ", adde, IIAlu>;
+      def ADDC   :  Arith<0x02, 0x000, "addc   ", adde, IIC_ALU>;
     }
   }
 
   let Uses = [CARRY] in {
-    def ADDKC  : ArithN<0x06, 0x000, "addkc  ", IIAlu>;
+    def ADDKC  : ArithN<0x06, 0x000, "addkc  ", IIC_ALU>;
   }
 }
 
 let isAsCheapAsAMove = 1 in {
-  def ANDN   :  ArithN<0x23, 0x000, "andn   ", IIAlu>;
-  def CMP    :  ArithN<0x05, 0x001, "cmp    ", IIAlu>;
-  def CMPU   :  ArithN<0x05, 0x003, "cmpu   ", IIAlu>;
-  def RSUBK  :  ArithR<0x05, 0x000, "rsubk  ", sub,  IIAlu>;
+  def ANDN   :  ArithN<0x23, 0x000, "andn   ", IIC_ALU>;
+  def CMP    :  ArithN<0x05, 0x001, "cmp    ", IIC_ALU>;
+  def CMPU   :  ArithN<0x05, 0x003, "cmpu   ", IIC_ALU>;
+  def RSUBK  :  ArithR<0x05, 0x000, "rsubk  ", sub,  IIC_ALU>;
 
   let Defs = [CARRY] in {
-    def RSUB   :  ArithR<0x01, 0x000, "rsub   ", subc, IIAlu>;
+    def RSUB   :  ArithR<0x01, 0x000, "rsub   ", subc, IIC_ALU>;
 
     let Uses = [CARRY] in {
-      def RSUBC  :  ArithR<0x03, 0x000, "rsubc  ", sube, IIAlu>;
+      def RSUBC  :  ArithR<0x03, 0x000, "rsubc  ", sube, IIC_ALU>;
     }
   }
 
   let Uses = [CARRY] in {
-    def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIAlu>;
+    def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIC_ALU>;
   }
 }
 
 let isCommutable = 1, Predicates=[HasMul] in {
-  def MUL    : Arith<0x10, 0x000, "mul    ", mul,   IIAlu>;
+  def MUL    : Arith<0x10, 0x000, "mul    ", mul,   IIC_ALUm>;
 }
 
 let isCommutable = 1, Predicates=[HasMul,HasMul64] in {
-  def MULH   : Arith<0x10, 0x001, "mulh   ", mulhs, IIAlu>;
-  def MULHU  : Arith<0x10, 0x003, "mulhu  ", mulhu, IIAlu>;
+  def MULH   : Arith<0x10, 0x001, "mulh   ", mulhs, IIC_ALUm>;
+  def MULHU  : Arith<0x10, 0x003, "mulhu  ", mulhu, IIC_ALUm>;
 }
 
 let Predicates=[HasMul,HasMul64] in {
-  def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>;
+  def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIC_ALUm>;
 }
 
 let Predicates=[HasBarrel] in {
-  def BSRL   :   Arith<0x11, 0x000, "bsrl   ", srl, IIAlu>;
-  def BSRA   :   Arith<0x11, 0x200, "bsra   ", sra, IIAlu>;
-  def BSLL   :   Arith<0x11, 0x400, "bsll   ", shl, IIAlu>;
+  def BSRL   :   Arith<0x11, 0x000, "bsrl   ", srl, IIC_SHT>;
+  def BSRA   :   Arith<0x11, 0x200, "bsra   ", sra, IIC_SHT>;
+  def BSLL   :   Arith<0x11, 0x400, "bsll   ", shl, IIC_SHT>;
   def BSRLI  :  ShiftI<0x19, 0x0, "bsrli  ", srl, uimm5, immZExt5>;
   def BSRAI  :  ShiftI<0x19, 0x1, "bsrai  ", sra, uimm5, immZExt5>;
   def BSLLI  :  ShiftI<0x19, 0x2, "bslli  ", shl, uimm5, immZExt5>;
 }
 
 let Predicates=[HasDiv] in {
-  def IDIV   :  ArithR<0x12, 0x000, "idiv   ", sdiv, IIAlu>;
-  def IDIVU  :  ArithR<0x12, 0x002, "idivu  ", udiv, IIAlu>;
+  def IDIV   :  ArithR<0x12, 0x000, "idiv   ", sdiv, IIC_ALUd>;
+  def IDIVU  :  ArithR<0x12, 0x002, "idivu  ", udiv, IIC_ALUd>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -552,7 +555,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
   def RTSD   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
                   "rtsd      $target, $imm",
                   [],
-                  IIBranch>;
+                  IIC_BR>;
 }
 
 let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
@@ -560,7 +563,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
   def RTID   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
                   "rtid      $target, $imm",
                   [],
-                  IIBranch>;
+                  IIC_BR>;
 }
 
 let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
@@ -568,7 +571,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
   def RTBD   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
                   "rtbd      $target, $imm",
                   [],
-                  IIBranch>;
+                  IIC_BR>;
 }
 
 let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
@@ -576,7 +579,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
   def RTED   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
                   "rted      $target, $imm",
                   [],
-                  IIBranch>;
+                  IIC_BR>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -584,7 +587,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
 //===----------------------------------------------------------------------===//
 
 let neverHasSideEffects = 1 in {
-  def NOP :  MBlazeInst< 0x20, FC, (outs), (ins), "nop    ", [], IIAlu>;
+  def NOP :  MBlazeInst< 0x20, FC, (outs), (ins), "nop    ", [], IIC_ALU>;
 }
 
 let usesCustomInserter = 1 in {
@@ -611,17 +614,17 @@ let usesCustomInserter = 1 in {
 
 let rb = 0 in {
   def SEXT16 : TA<0x24, 0x061, (outs GPR:$dst), (ins GPR:$src),
-                  "sext16    $dst, $src", [], IIAlu>;
+                  "sext16    $dst, $src", [], IIC_ALU>;
   def SEXT8  : TA<0x24, 0x060, (outs GPR:$dst), (ins GPR:$src),
-                  "sext8     $dst, $src", [], IIAlu>;
+                  "sext8     $dst, $src", [], IIC_ALU>;
   let Defs = [CARRY] in {
     def SRL    : TA<0x24, 0x041, (outs GPR:$dst), (ins GPR:$src),
-                    "srl       $dst, $src", [], IIAlu>;
+                    "srl       $dst, $src", [], IIC_ALU>;
     def SRA    : TA<0x24, 0x001, (outs GPR:$dst), (ins GPR:$src),
-                    "sra       $dst, $src", [], IIAlu>;
+                    "sra       $dst, $src", [], IIC_ALU>;
     let Uses = [CARRY] in {
       def SRC    : TA<0x24, 0x021, (outs GPR:$dst), (ins GPR:$src),
-                      "src       $dst, $src", [], IIAlu>;
+                      "src       $dst, $src", [], IIC_ALU>;
     }
   }
 }
@@ -637,36 +640,36 @@ let isCodeGenOnly=1 in {
 //===----------------------------------------------------------------------===//
 let Form=FRCS in {
   def MFS : SPC<0x25, 0x2, (outs GPR:$dst), (ins SPR:$src),
-                "mfs       $dst, $src", [], IIAlu>;
+                "mfs       $dst, $src", [], IIC_ALU>;
 }
 
 let Form=FCRCS in {
   def MTS : SPC<0x25, 0x3, (outs SPR:$dst), (ins GPR:$src),
-                "mts       $dst, $src", [], IIAlu>;
+                "mts       $dst, $src", [], IIC_ALU>;
 }
 
 def MSRSET : MSR<0x25, 0x20, (outs GPR:$dst), (ins uimm15:$set),
-                 "msrset    $dst, $set", [], IIAlu>;
+                 "msrset    $dst, $set", [], IIC_ALU>;
 
 def MSRCLR : MSR<0x25, 0x22, (outs GPR:$dst), (ins uimm15:$clr),
-                 "msrclr    $dst, $clr", [], IIAlu>;
+                 "msrclr    $dst, $clr", [], IIC_ALU>;
 
 let rd=0x0, Form=FCRR in {
   def WDC  : TA<0x24, 0x64, (outs), (ins GPR:$a, GPR:$b),
-                "wdc       $a, $b", [], IIAlu>;
+                "wdc       $a, $b", [], IIC_WDC>;
   def WDCF : TA<0x24, 0x74, (outs), (ins GPR:$a, GPR:$b),
-                "wdc.flush $a, $b", [], IIAlu>;
+                "wdc.flush $a, $b", [], IIC_WDC>;
   def WDCC : TA<0x24, 0x66, (outs), (ins GPR:$a, GPR:$b),
-                "wdc.clear $a, $b", [], IIAlu>;
+                "wdc.clear $a, $b", [], IIC_WDC>;
   def WIC  : TA<0x24, 0x68, (outs), (ins GPR:$a, GPR:$b),
-                "wic       $a, $b", [], IIAlu>;
+                "wic       $a, $b", [], IIC_WDC>;
 }
 
 def BRK  :  BranchL<0x26, 0x0C, 0x000, "brk    ">;
 def BRKI : BranchLI<0x2E, 0x0C, "brki   ">;
 
 def IMM : MBlazeInst<0x2C, FCCI, (outs), (ins simm16:$imm),
-                     "imm       $imm", [], IIAlu>;
+                     "imm       $imm", [], IIC_ALU>;
 
 //===----------------------------------------------------------------------===//
 // Pseudo instructions for atomic operations
@@ -848,11 +851,6 @@ def : Pat<(MBWrapper tconstpool:$in),  (ORI (i32 R0), tconstpool:$in)>;
 // Misc instructions
 def : Pat<(and (i32 GPR:$lh), (not (i32 GPR:$rh))),(ANDN GPR:$lh, GPR:$rh)>;
 
-// Arithmetic with immediates
-def : Pat<(add (i32 GPR:$in), imm:$imm),(ADDIK GPR:$in, imm:$imm)>;
-def : Pat<(or (i32 GPR:$in), imm:$imm),(ORI GPR:$in, imm:$imm)>;
-def : Pat<(xor (i32 GPR:$in), imm:$imm),(XORI GPR:$in, imm:$imm)>;
-
 // Convert any extend loads into zero extend loads
 def : Pat<(extloadi8  iaddr:$src), (i32 (LBUI iaddr:$src))>;
 def : Pat<(extloadi16 iaddr:$src), (i32 (LHUI iaddr:$src))>;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index fa9140d..ed8511d 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -181,6 +181,26 @@ unsigned MBlazeRegisterInfo::getSpecialRegisterFromNumbering(unsigned Reg) {
   return 0; // Not reached
 }
 
+bool MBlazeRegisterInfo::isRegister(unsigned Reg) {
+  return Reg <= 31;
+}
+
+bool MBlazeRegisterInfo::isSpecialRegister(unsigned Reg) {
+  switch (Reg) {
+    case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 : 
+    case 0x0007 : case 0x000B : case 0x000D : case 0x1000 : 
+    case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 : 
+    case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 : 
+    case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 : 
+    case 0x2008 : case 0x2009 : case 0x200A : case 0x200B : 
+      return true;
+
+    default:
+      return false;
+  }
+  return false; // Not reached
+}
+
 unsigned MBlazeRegisterInfo::getPICCallReg() {
   return MBlaze::R20;
 }
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 839536d..69ec5aa 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -45,6 +45,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
   static unsigned getRegisterNumbering(unsigned RegEnum);
   static unsigned getRegisterFromNumbering(unsigned RegEnum);
   static unsigned getSpecialRegisterFromNumbering(unsigned RegEnum);
+  static bool isRegister(unsigned RegEnum);
+  static bool isSpecialRegister(unsigned RegEnum);
 
   /// Get PIC indirect call register
   static unsigned getPICCallReg();
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
index fbefb22..1a695a7 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.td
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -85,18 +85,19 @@ let Namespace = "MBlaze" in {
   def RTLBX  : MBlazeSPRReg<0x1002, "rtlbx">,  DwarfRegNum<[41]>;
   def RTLBLO : MBlazeSPRReg<0x1003, "rtlblo">, DwarfRegNum<[42]>;
   def RTLBHI : MBlazeSPRReg<0x1004, "rtlbhi">, DwarfRegNum<[43]>;
-  def RPVR0  : MBlazeSPRReg<0x2000, "rpvr0">,  DwarfRegNum<[44]>;
-  def RPVR1  : MBlazeSPRReg<0x2001, "rpvr1">,  DwarfRegNum<[45]>;
-  def RPVR2  : MBlazeSPRReg<0x2002, "rpvr2">,  DwarfRegNum<[46]>;
-  def RPVR3  : MBlazeSPRReg<0x2003, "rpvr3">,  DwarfRegNum<[47]>;
-  def RPVR4  : MBlazeSPRReg<0x2004, "rpvr4">,  DwarfRegNum<[48]>;
-  def RPVR5  : MBlazeSPRReg<0x2005, "rpvr5">,  DwarfRegNum<[49]>;
-  def RPVR6  : MBlazeSPRReg<0x2006, "rpvr6">,  DwarfRegNum<[50]>;
-  def RPVR7  : MBlazeSPRReg<0x2007, "rpvr7">,  DwarfRegNum<[51]>;
-  def RPVR8  : MBlazeSPRReg<0x2008, "rpvr8">,  DwarfRegNum<[52]>;
-  def RPVR9  : MBlazeSPRReg<0x2009, "rpvr9">,  DwarfRegNum<[53]>;
-  def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[54]>;
-  def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[55]>;
+  def RTLBSX : MBlazeSPRReg<0x1004, "rtlbsx">, DwarfRegNum<[44]>;
+  def RPVR0  : MBlazeSPRReg<0x2000, "rpvr0">,  DwarfRegNum<[45]>;
+  def RPVR1  : MBlazeSPRReg<0x2001, "rpvr1">,  DwarfRegNum<[46]>;
+  def RPVR2  : MBlazeSPRReg<0x2002, "rpvr2">,  DwarfRegNum<[47]>;
+  def RPVR3  : MBlazeSPRReg<0x2003, "rpvr3">,  DwarfRegNum<[48]>;
+  def RPVR4  : MBlazeSPRReg<0x2004, "rpvr4">,  DwarfRegNum<[49]>;
+  def RPVR5  : MBlazeSPRReg<0x2005, "rpvr5">,  DwarfRegNum<[50]>;
+  def RPVR6  : MBlazeSPRReg<0x2006, "rpvr6">,  DwarfRegNum<[51]>;
+  def RPVR7  : MBlazeSPRReg<0x2007, "rpvr7">,  DwarfRegNum<[52]>;
+  def RPVR8  : MBlazeSPRReg<0x2008, "rpvr8">,  DwarfRegNum<[53]>;
+  def RPVR9  : MBlazeSPRReg<0x2009, "rpvr9">,  DwarfRegNum<[54]>;
+  def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[55]>;
+  def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[56]>;
 
   // The carry bit. In the Microblaze this is really bit 29 of the
   // MSR register but this is the only bit of that register that we
diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td
index ac4d98c..4662f25 100644
--- a/lib/Target/MBlaze/MBlazeSchedule.td
+++ b/lib/Target/MBlaze/MBlazeSchedule.td
@@ -8,57 +8,48 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// Functional units across MBlaze chips sets. Based on GCC/MBlaze backend files.
+// MBlaze functional units.
 //===----------------------------------------------------------------------===//
-def ALU     : FuncUnit;
-def IMULDIV : FuncUnit;
+def IF : FuncUnit;
+def ID : FuncUnit;
+def EX : FuncUnit;
+def MA : FuncUnit;
+def WB : FuncUnit;
 
 //===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for MBlaze
 //===----------------------------------------------------------------------===//
-def IIAlu              : InstrItinClass;
-def IILoad             : InstrItinClass;
-def IIStore            : InstrItinClass;
-def IIXfer             : InstrItinClass;
-def IIBranch           : InstrItinClass;
-def IIHiLo             : InstrItinClass;
-def IIImul             : InstrItinClass;
-def IIIdiv             : InstrItinClass;
-def IIFcvt             : InstrItinClass;
-def IIFmove            : InstrItinClass;
-def IIFcmp             : InstrItinClass;
-def IIFadd             : InstrItinClass;
-def IIFmulSingle       : InstrItinClass;
-def IIFmulDouble       : InstrItinClass;
-def IIFdivSingle       : InstrItinClass;
-def IIFdivDouble       : InstrItinClass;
-def IIFsqrtSingle      : InstrItinClass;
-def IIFsqrtDouble      : InstrItinClass;
-def IIFrecipFsqrtStep  : InstrItinClass;
-def IIPseudo           : InstrItinClass;
+def IIC_ALU    : InstrItinClass;
+def IIC_ALUm   : InstrItinClass;
+def IIC_ALUd   : InstrItinClass;
+def IIC_SHT    : InstrItinClass;
+def IIC_FSLg   : InstrItinClass;
+def IIC_FSLp   : InstrItinClass;
+def IIC_MEMs   : InstrItinClass;
+def IIC_MEMl   : InstrItinClass;
+def IIC_FPU    : InstrItinClass;
+def IIC_FPUd   : InstrItinClass;
+def IIC_FPUf   : InstrItinClass;
+def IIC_FPUi   : InstrItinClass;
+def IIC_FPUs   : InstrItinClass;
+def IIC_FPUc   : InstrItinClass;
+def IIC_BR     : InstrItinClass;
+def IIC_BRc    : InstrItinClass;
+def IIC_BRl    : InstrItinClass;
+def IIC_WDC    : InstrItinClass;
+def IIC_Pseudo : InstrItinClass;
 
 //===----------------------------------------------------------------------===//
-// MBlaze Generic instruction itineraries.
+// MBlaze generic instruction itineraries.
 //===----------------------------------------------------------------------===//
-def MBlazeGenericItineraries : ProcessorItineraries<
-  [ALU, IMULDIV], [], [
-  InstrItinData<IIAlu              , [InstrStage<1,  [ALU]>]>,
-  InstrItinData<IILoad             , [InstrStage<3,  [ALU]>]>,
-  InstrItinData<IIStore            , [InstrStage<1,  [ALU]>]>,
-  InstrItinData<IIXfer             , [InstrStage<2,  [ALU]>]>,
-  InstrItinData<IIBranch           , [InstrStage<1,  [ALU]>]>,
-  InstrItinData<IIHiLo             , [InstrStage<1,  [IMULDIV]>]>,
-  InstrItinData<IIImul             , [InstrStage<17, [IMULDIV]>]>,
-  InstrItinData<IIIdiv             , [InstrStage<38, [IMULDIV]>]>,
-  InstrItinData<IIFcvt             , [InstrStage<1,  [ALU]>]>,
-  InstrItinData<IIFmove            , [InstrStage<2,  [ALU]>]>,
-  InstrItinData<IIFcmp             , [InstrStage<3,  [ALU]>]>,
-  InstrItinData<IIFadd             , [InstrStage<4,  [ALU]>]>,
-  InstrItinData<IIFmulSingle       , [InstrStage<7,  [ALU]>]>,
-  InstrItinData<IIFmulDouble       , [InstrStage<8,  [ALU]>]>,
-  InstrItinData<IIFdivSingle       , [InstrStage<23, [ALU]>]>,
-  InstrItinData<IIFdivDouble       , [InstrStage<36, [ALU]>]>,
-  InstrItinData<IIFsqrtSingle      , [InstrStage<54, [ALU]>]>,
-  InstrItinData<IIFsqrtDouble      , [InstrStage<12, [ALU]>]>,
-  InstrItinData<IIFrecipFsqrtStep  , [InstrStage<5,  [ALU]>]>
-]>;
+def MBlazeGenericItineraries : ProcessorItineraries<[], [], []>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for three stage pipeline.
+//===----------------------------------------------------------------------===//
+include "MBlazeSchedule3.td"
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for five stage pipeline.
+//===----------------------------------------------------------------------===//
+include "MBlazeSchedule5.td"
diff --git a/lib/Target/MBlaze/MBlazeSchedule3.td b/lib/Target/MBlaze/MBlazeSchedule3.td
new file mode 100644
index 0000000..ccbf99d
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSchedule3.td
@@ -0,0 +1,236 @@
+//===- MBlazeSchedule3.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for the three stage pipeline.
+//===----------------------------------------------------------------------===//
+def MBlazePipe3Itineraries : ProcessorItineraries<
+  [IF,ID,EX], [], [
+
+  // ALU instruction with one destination register and either two register
+  // source operands or one register source operand and one immediate operand.
+  // The instruction takes one cycle to execute in each of the stages. The
+  // two source operands are read during the decode stage and the result is
+  // ready after the execute stage.
+  InstrItinData< IIC_ALU,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>], // one cycle in execute stage
+               [ 2                    // result ready after two cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // ALU multiply instruction with one destination register and either two
+  // register source operands or one register source operand and one immediate
+  // operand.  The instruction takes one cycle to execute in each of the
+  // pipeline stages except the execute stage, which takes three cycles. The
+  // two source operands are read during the decode stage and the result is
+  // ready after the execute stage.
+  InstrItinData< IIC_ALUm,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<3,[EX]>], // three cycles in execute stage
+               [ 4                    // result ready after four cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // ALU divide instruction with one destination register two register source
+  // operands. The instruction takes one cycle to execute in each the pipeline
+  // stages except the execute stage, which takes 34 cycles. The two
+  // source operands are read during the decode stage and the result is ready
+  // after the execute stage.
+  InstrItinData< IIC_ALUd,
+               [ InstrStage<1,[IF]>    // one cycle in fetch stage
+               , InstrStage<1,[ID]>    // one cycle in decode stage
+               , InstrStage<34,[EX]>], // 34 cycles in execute stage
+               [ 35                    // result ready after 35 cycles
+               , 1                     // first operand read after one cycle
+               , 1 ]>,                 // second operand read after one cycle
+
+  // Shift instruction with one destination register and either two register
+  // source operands or one register source operand and one immediate operand.
+  // The instruction takes one cycle to execute in each of the pipeline stages
+  // except the execute stage, which takes two cycles.  The two source operands
+  // are read during the decode stage and the result is ready after the execute
+  // stage.
+  InstrItinData< IIC_SHT,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<2,[EX]>], // two cycles in execute stage
+               [ 3                    // result ready after three cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Branch instruction with one source operand register. The instruction takes
+  // one cycle to execute in each of the pipeline stages. The source operand is
+  // read during the decode stage.
+  InstrItinData< IIC_BR,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>], // one cycle in execute stage
+               [ 1 ]>,                // first operand read after one cycle
+
+  // Conditional branch instruction with two source operand registers. The
+  // instruction takes one cycle to execute in each of the pipeline stages. The
+  // two source operands are read during the decode stage.
+  InstrItinData< IIC_BRc,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>], // one cycle in execute stage
+               [ 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Branch and link instruction with one destination register and one source
+  // operand register. The instruction takes one cycle to execute in each of
+  // the pipeline stages. The source operand is read during the decode stage
+  // and the destination register is ready after the execute stage.
+  InstrItinData< IIC_BRl,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>], // one cycle in execute stage
+               [ 2                    // result ready after two cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // Cache control instruction with two source operand registers. The
+  // instruction takes one cycle to execute in each of the pipeline stages
+  // except the memory access stage, which takes two cycles. The source
+  // operands are read during the decode stage.
+  InstrItinData< IIC_WDC,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<2,[EX]>], // two cycles in execute stage
+               [ 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Floating point instruction with one destination register and two source
+  // operand registers. The instruction takes one cycle to execute in each of
+  // the pipeline stages except the execute stage, which takes six cycles. The
+  // source operands are read during the decode stage and the results are ready
+  // after the execute stage.
+  InstrItinData< IIC_FPU,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<6,[EX]>], // six cycles in execute stage
+               [ 7                    // result ready after seven cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Floating point divide instruction with one destination register and two
+  // source operand registers. The instruction takes one cycle to execute in
+  // each of the pipeline stages except the execute stage, which takes 30
+  // cycles. The source operands are read during the decode stage and the
+  // results are ready after the execute stage.
+  InstrItinData< IIC_FPUd,
+               [ InstrStage<1,[IF]>    // one cycle in fetch stage
+               , InstrStage<1,[ID]>    // one cycle in decode stage
+               , InstrStage<30,[EX]>], // one cycle in execute stage
+               [ 31                    // result ready after 31 cycles
+               , 1                     // first operand read after one cycle
+               , 1 ]>,                 // second operand read after one cycle
+
+  // Convert floating point to integer instruction with one destination
+  // register and one source operand register. The instruction takes one cycle
+  // to execute in each of the pipeline stages except the execute stage,
+  // which takes seven cycles. The source operands are read during the decode
+  // stage and the results are ready after the execute stage.
+  InstrItinData< IIC_FPUi,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<7,[EX]>], // seven cycles in execute stage
+               [ 8                    // result ready after eight cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // Convert integer to floating point instruction with one destination
+  // register and one source operand register. The instruction takes one cycle
+  // to execute in each of the pipeline stages except the execute stage,
+  // which takes six cycles. The source operands are read during the decode
+  // stage and the results are ready after the execute stage.
+  InstrItinData< IIC_FPUf,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<6,[EX]>], // six cycles in execute stage
+               [ 7                    // result ready after seven cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // Floating point square root instruction with one destination register and
+  // one source operand register. The instruction takes one cycle to execute in
+  // each of the pipeline stages except the execute stage, which takes 29
+  // cycles. The source operands are read during the decode stage and the
+  // results are ready after the execute stage.
+  InstrItinData< IIC_FPUs,
+               [ InstrStage<1,[IF]>    // one cycle in fetch stage
+               , InstrStage<1,[ID]>    // one cycle in decode stage
+               , InstrStage<29,[EX]>], // 29 cycles in execute stage
+               [ 30                    // result ready after 30 cycles
+               , 1 ]>,                 // first operand read after one cycle
+
+  // Floating point comparison instruction with one destination register and
+  // two source operand registers. The instruction takes one cycle to execute
+  // in each of the pipeline stages except the execute stage, which takes three
+  // cycles. The source operands are read during the decode stage and the
+  // results are ready after the execute stage.
+  InstrItinData< IIC_FPUc,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<3,[EX]>], // three cycles in execute stage
+               [ 4                    // result ready after four cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // FSL get instruction with one register or immediate source operand and one
+  // destination register. The instruction takes one cycle to execute in each
+  // of the pipeline stages except the execute stage, which takes two cycles.
+  // The one source operand is read during the decode stage and the result is
+  // ready after the execute stage.
+  InstrItinData< IIC_FSLg,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<2,[EX]>], // two cycles in execute stage
+               [ 3                    // result ready after two cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // FSL put instruction with either two register source operands or one
+  // register source operand and one immediate operand. There is no result
+  // produced by the instruction. The instruction takes one cycle to execute in
+  // each of the pipeline stages except the execute stage, which takes two
+  // cycles. The two source operands are read during the decode stage.
+  InstrItinData< IIC_FSLp,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<2,[EX]>], // two cycles in execute stage
+               [ 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Memory store instruction with either three register source operands or two
+  // register source operands and one immediate operand. There is no result
+  // produced by the instruction. The instruction takes one cycle to execute in
+  // each of the pipeline stages except the execute stage, which takes two
+  // cycles. All of the source operands are read during the decode stage.
+  InstrItinData< IIC_MEMs,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<2,[EX]>], // two cycles in execute stage
+               [ 1                    // first operand read after one cycle
+               , 1                    // second operand read after one cycle
+               , 1 ]>,                // third operand read after one cycle
+
+  // Memory load instruction with one destination register and either two
+  // register source operands or one register source operand and one immediate
+  // operand. The instruction takes one cycle to execute in each of the
+  // pipeline stages except the execute stage, which takes two cycles. All of
+  // the source operands are read during the decode stage and the result is
+  // ready after the execute stage.
+  InstrItinData< IIC_MEMl,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<2,[EX]>], // two cycles in execute stage
+               [ 3                    // result ready after four cycles
+               , 1                    // second operand read after one cycle
+               , 1 ]>                 // third operand read after one cycle
+]>;
diff --git a/lib/Target/MBlaze/MBlazeSchedule5.td b/lib/Target/MBlaze/MBlazeSchedule5.td
new file mode 100644
index 0000000..fa88766
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSchedule5.td
@@ -0,0 +1,267 @@
+//===- MBlazeSchedule5.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for the five stage pipeline.
+//===----------------------------------------------------------------------===//
+def MBlazePipe5Itineraries : ProcessorItineraries<
+  [IF,ID,EX,MA,WB], [], [
+
+  // ALU instruction with one destination register and either two register
+  // source operands or one register source operand and one immediate operand.
+  // The instruction takes one cycle to execute in each of the stages. The
+  // two source operands are read during the decode stage and the result is
+  // ready after the execute stage.
+  InstrItinData< IIC_ALU,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 2                    // result ready after two cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // ALU multiply instruction with one destination register and either two
+  // register source operands or one register source operand and one immediate
+  // operand.  The instruction takes one cycle to execute in each of the
+  // pipeline stages. The two source operands are read during the decode stage
+  // and the result is ready after the execute stage.
+  InstrItinData< IIC_ALUm,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 2                    // result ready after two cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // ALU divide instruction with one destination register two register source
+  // operands. The instruction takes one cycle to execute in each the pipeline
+  // stages except the memory access stage, which takes 31 cycles. The two
+  // source operands are read during the decode stage and the result is ready
+  // after the memory access stage.
+  InstrItinData< IIC_ALUd,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<31,[MA]>  // 31 cycles in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 33                   // result ready after 33 cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Shift instruction with one destination register and either two register
+  // source operands or one register source operand and one immediate operand.
+  // The instruction takes one cycle to execute in each of the pipeline stages.
+  // The two source operands are read during the decode stage and the result is
+  // ready after the memory access stage.
+  InstrItinData< IIC_SHT,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 3                    // result ready after three cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Branch instruction with one source operand register. The instruction takes
+  // one cycle to execute in each of the pipeline stages. The source operand is
+  // read during the decode stage.
+  InstrItinData< IIC_BR,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 1 ]>,                // first operand read after one cycle
+
+  // Conditional branch instruction with two source operand registers. The
+  // instruction takes one cycle to execute in each of the pipeline stages. The
+  // two source operands are read during the decode stage.
+  InstrItinData< IIC_BRc,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Branch and link instruction with one destination register and one source
+  // operand register. The instruction takes one cycle to execute in each of
+  // the pipeline stages. The source operand is read during the decode stage
+  // and the destination register is ready after the writeback stage.
+  InstrItinData< IIC_BRl,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 4                    // result ready after four cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // Cache control instruction with two source operand registers. The
+  // instruction takes one cycle to execute in each of the pipeline stages
+  // except the memory access stage, which takes two cycles. The source
+  // operands are read during the decode stage.
+  InstrItinData< IIC_WDC,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<2,[MA]>   // two cycles in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Floating point instruction with one destination register and two source
+  // operand registers. The instruction takes one cycle to execute in each of
+  // the pipeline stages except the memory access stage, which takes two
+  // cycles. The source operands are read during the decode stage and the
+  // results are ready after the writeback stage.
+  InstrItinData< IIC_FPU,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<2,[MA]>   // two cycles in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 5                    // result ready after five cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Floating point divide instruction with one destination register and two
+  // source operand registers. The instruction takes one cycle to execute in
+  // each of the pipeline stages except the memory access stage, which takes 26
+  // cycles. The source operands are read during the decode stage and the
+  // results are ready after the writeback stage.
+  InstrItinData< IIC_FPUd,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<26,[MA]>  // 26 cycles in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 29                   // result ready after 29 cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Convert floating point to integer instruction with one destination
+  // register and one source operand register. The instruction takes one cycle
+  // to execute in each of the pipeline stages except the memory access stage,
+  // which takes three cycles. The source operands are read during the decode
+  // stage and the results are ready after the writeback stage.
+  InstrItinData< IIC_FPUi,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<3,[MA]>   // three cycles in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 6                   // result ready after six cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // Convert integer to floating point instruction with one destination
+  // register and one source operand register. The instruction takes one cycle
+  // to execute in each of the pipeline stages except the memory access stage,
+  // which takes two cycles. The source operands are read during the decode
+  // stage and the results are ready after the writeback stage.
+  InstrItinData< IIC_FPUf,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<2,[MA]>   // two cycles in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 5                    // result ready after five cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // Floating point square root instruction with one destination register and
+  // one source operand register. The instruction takes one cycle to execute in
+  // each of the pipeline stages except the memory access stage, which takes 25
+  // cycles. The source operands are read during the decode stage and the
+  // results are ready after the writeback stage.
+  InstrItinData< IIC_FPUs,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<25,[MA]>  // 25 cycles in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 28                   // result ready after 28 cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // Floating point comparison instruction with one destination register and
+  // two source operand registers. The instruction takes one cycle to execute
+  // in each of the pipeline stages. The source operands are read during the
+  // decode stage and the results are ready after the execute stage.
+  InstrItinData< IIC_FPUc,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 2                    // result ready after two cycles
+               , 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // FSL get instruction with one register or immediate source operand and one
+  // destination register. The instruction takes one cycle to execute in each
+  // of the pipeline stages. The one source operand is read during the decode
+  // stage and the result is ready after the execute stage.
+  InstrItinData< IIC_FSLg,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 2                    // result ready after two cycles
+               , 1 ]>,                // first operand read after one cycle
+
+  // FSL put instruction with either two register source operands or one
+  // register source operand and one immediate operand. There is no result
+  // produced by the instruction. The instruction takes one cycle to execute in
+  // each of the pipeline stages. The two source operands are read during the
+  // decode stage.
+  InstrItinData< IIC_FSLp,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 1                    // first operand read after one cycle
+               , 1 ]>,                // second operand read after one cycle
+
+  // Memory store instruction with either three register source operands or two
+  // register source operands and one immediate operand. There is no result
+  // produced by the instruction. The instruction takes one cycle to execute in
+  // each of the pipeline stages. All of the source operands are read during
+  // the decode stage.
+  InstrItinData< IIC_MEMs,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 1                    // first operand read after one cycle
+               , 1                    // second operand read after one cycle
+               , 1 ]>,                // third operand read after one cycle
+
+  // Memory load instruction with one destination register and either two
+  // register source operands or one register source operand and one immediate
+  // operand. The instruction takes one cycle to execute in each of the
+  // pipeline stages. All of the source operands are read during the decode
+  // stage and the result is ready after the writeback stage.
+  InstrItinData< IIC_MEMl,
+               [ InstrStage<1,[IF]>   // one cycle in fetch stage
+               , InstrStage<1,[ID]>   // one cycle in decode stage
+               , InstrStage<1,[EX]>   // one cycle in execute stage
+               , InstrStage<1,[MA]>   // one cycle in memory access stage
+               , InstrStage<1,[WB]>], // one cycle in write back stage
+               [ 4                    // result ready after four cycles
+               , 1                    // second operand read after one cycle
+               , 1 ]>                 // third operand read after one cycle
+]>;
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp
index 3440521..a80744a 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.cpp
+++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -13,19 +13,39 @@
 
 #include "MBlazeSubtarget.h"
 #include "MBlaze.h"
+#include "MBlazeRegisterInfo.h"
 #include "MBlazeGenSubtarget.inc"
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
 MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, const std::string &FS):
-  HasPipe3(false), HasBarrel(false), HasDiv(false), HasMul(false),
-  HasFSL(false), HasEFSL(false), HasMSRSet(false), HasException(false),
-  HasPatCmp(false), HasFPU(false), HasESR(false), HasPVR(false),
-  HasMul64(false), HasSqrt(false), HasMMU(false)
+  HasBarrel(false), HasDiv(false), HasMul(false), HasPatCmp(false),
+  HasFPU(false), HasMul64(false), HasSqrt(false)
 {
-  std::string CPU = "v400";
-  MBlazeArchVersion = V400;
-
   // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
+  std::string CPU = "mblaze";
+  CPU = ParseSubtargetFeatures(FS, CPU);
+
+  // Only use instruction scheduling if the selected CPU has an instruction
+  // itinerary (the default CPU is the only one that doesn't).
+  HasItin = CPU != "mblaze";
+  DEBUG(dbgs() << "CPU " << CPU << "(" << HasItin << ")\n");
+
+  // Compute the issue width of the MBlaze itineraries
+  computeIssueWidth();
+}
+
+void MBlazeSubtarget::computeIssueWidth() {
+  InstrItins.IssueWidth = 1;
+}
+
+bool MBlazeSubtarget::
+enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+                      TargetSubtarget::AntiDepBreakMode& Mode,
+                      RegClassVector& CriticalPathRCs) const {
+  Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+  CriticalPathRCs.clear();
+  CriticalPathRCs.push_back(&MBlaze::GPRRegClass);
+  return HasItin && OptLevel >= CodeGenOpt::Default;
 }
+
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h
index bebb3f7..2255b28 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.h
+++ b/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -24,29 +24,14 @@ namespace llvm {
 class MBlazeSubtarget : public TargetSubtarget {
 
 protected:
-
-  enum MBlazeArchEnum {
-    V400, V500, V600, V700, V710
-  };
-
-  // MBlaze architecture version
-  MBlazeArchEnum MBlazeArchVersion;
-
-  bool HasPipe3;
   bool HasBarrel;
   bool HasDiv;
   bool HasMul;
-  bool HasFSL;
-  bool HasEFSL;
-  bool HasMSRSet;
-  bool HasException;
   bool HasPatCmp;
   bool HasFPU;
-  bool HasESR;
-  bool HasPVR;
   bool HasMul64;
   bool HasSqrt;
-  bool HasMMU;
+  bool HasItin;
 
   InstrItineraryData InstrItins;
 
@@ -61,18 +46,26 @@ public:
   std::string ParseSubtargetFeatures(const std::string &FS,
                                      const std::string &CPU);
 
+  /// Compute the number of maximum number of issues per cycle for the
+  /// MBlaze scheduling itineraries.
+  void computeIssueWidth();
+
+  /// enablePostRAScheduler - True at 'More' optimization.
+  bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+                             TargetSubtarget::AntiDepBreakMode& Mode,
+                             RegClassVector& CriticalPathRCs) const;
+
+  /// getInstrItins - Return the instruction itineraies based on subtarget.
+  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+  bool hasItin()   const { return HasItin; }
+  bool hasPCMP()   const { return HasPatCmp; }
   bool hasFPU()    const { return HasFPU; }
   bool hasSqrt()   const { return HasSqrt; }
   bool hasMul()    const { return HasMul; }
   bool hasMul64()  const { return HasMul64; }
   bool hasDiv()    const { return HasDiv; }
   bool hasBarrel() const { return HasBarrel; }
-
-  bool isV400() const { return MBlazeArchVersion == V400; }
-  bool isV500() const { return MBlazeArchVersion == V500; }
-  bool isV600() const { return MBlazeArchVersion == V600; }
-  bool isV700() const { return MBlazeArchVersion == V700; }
-  bool isV710() const { return MBlazeArchVersion == V710; }
 };
 } // End llvm namespace
 
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index cd949e1..df34a83 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -36,19 +36,18 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
                                     bool RelaxAll,
                                     bool NoExecStack) {
   Triple TheTriple(TT);
-  switch (TheTriple.getOS()) {
-  case Triple::Darwin:
+
+  if (TheTriple.isOSDarwin()) {
     llvm_unreachable("MBlaze does not support Darwin MACH-O format");
     return NULL;
-  case Triple::MinGW32:
-  case Triple::Cygwin:
-  case Triple::Win32:
+  }
+
+  if (TheTriple.isOSWindows()) {
     llvm_unreachable("MBlaze does not support Windows COFF format");
     return NULL;
-  default:
-    return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll,
-                             NoExecStack);
   }
+
+  return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
 }
 
 
@@ -87,7 +86,8 @@ MBlazeTargetMachine(const Target &T, const std::string &TT,
   DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
   InstrInfo(*this),
   FrameLowering(Subtarget),
-  TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this) {
+  TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
+  InstrItins(Subtarget.getInstrItineraryData()) {
   if (getRelocationModel() == Reloc::Default) {
       setRelocationModel(Reloc::Static);
   }
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index 45ad078..48ce37a 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -38,13 +38,18 @@ namespace llvm {
     MBlazeSelectionDAGInfo TSInfo;
     MBlazeIntrinsicInfo    IntrinsicInfo;
     MBlazeELFWriterInfo    ELFWriterInfo;
+    InstrItineraryData     InstrItins;
+
   public:
     MBlazeTargetMachine(const Target &T, const std::string &TT,
-                      const std::string &FS);
+                        const std::string &FS);
 
     virtual const MBlazeInstrInfo *getInstrInfo() const
     { return &InstrInfo; }
 
+    virtual const InstrItineraryData *getInstrItineraryData() const
+    {  return &InstrItins; }
+
     virtual const TargetFrameLowering *getFrameLowering() const
     { return &FrameLowering; }
 
diff --git a/lib/Target/MBlaze/TODO b/lib/Target/MBlaze/TODO
index 2e613eb..317d7c0 100644
--- a/lib/Target/MBlaze/TODO
+++ b/lib/Target/MBlaze/TODO
@@ -9,8 +9,6 @@
   needs to be examined more closely:
     - The stack layout needs to be examined to make sure it meets
       the standard, especially in regards to var arg functions.
-    - The processor itineraries are copied from a different backend
-      and need to be updated to model the MicroBlaze correctly.
     - Look at the MBlazeGenFastISel.inc stuff and make use of it
       if appropriate.
 
@@ -18,9 +16,6 @@
   There are a few things that need to be looked at:
     - There are some instructions that are not generated by the backend
       and have not been tested as far as the parser is concerned.
-    - The assembly parser does not use any MicroBlaze specific directives.
+    - The assembly parser does not use many MicroBlaze specific directives.
       I should investigate if there are MicroBlaze specific directive and,
       if there are, add them.
-    - The instruction MFS and MTS use special names for some of the
-      special registers that can be accessed. These special register
-      names should be parsed by the assembly parser.
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index f0e1ce2..63860dc 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -18,11 +18,12 @@
 
 namespace llvm {
   class MCOperand;
+  class TargetMachine;
 
   class MSP430InstPrinter : public MCInstPrinter {
   public:
-    MSP430InstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {
-    }
+    MSP430InstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+      : MCInstPrinter(MAI) {}
 
     virtual void printInst(const MCInst *MI, raw_ostream &O);
 
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index a1a7f44..5264d68 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -164,10 +164,11 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 }
 
 static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+                                                TargetMachine &TM,
                                                 unsigned SyntaxVariant,
                                                 const MCAsmInfo &MAI) {
   if (SyntaxVariant == 0)
-    return new MSP430InstPrinter(MAI);
+    return new MSP430InstPrinter(TM, MAI);
   return 0;
 }
 
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index a95d59c..006785b 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -515,7 +515,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
 
   // Build a sequence of copy-to-reg nodes chained together with token chain and
   // flag operands which copy the outgoing args into registers.  The InFlag in
-  // necessary since all emited instructions must be stuck together.
+  // necessary since all emitted instructions must be stuck together.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 26df1a0..8939b0a 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -13,6 +13,7 @@ tablegen(MipsGenSubtarget.inc -gen-subtarget)
 add_llvm_target(MipsCodeGen
   MipsAsmPrinter.cpp
   MipsDelaySlotFiller.cpp
+  MipsExpandPseudo.cpp
   MipsInstrInfo.cpp
   MipsISelDAGToDAG.cpp
   MipsISelLowering.cpp
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index a9ab050..05b4c5a 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the entry points for global functions defined in 
+// This file contains the entry points for global functions defined in
 // the LLVM Mips back-end.
 //
 //===----------------------------------------------------------------------===//
@@ -25,6 +25,7 @@ namespace llvm {
 
   FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
   FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
+  FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM);
 
   extern Target TheMipsTarget;
   extern Target TheMipselTarget;
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 3e6437b..b79016d 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -59,7 +59,7 @@ def FeatureMips1       : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
 def FeatureMips2       : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
                                 "Mips2 ISA Support">;
 def FeatureMips32      : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
-                                "Mips32 ISA Support", 
+                                "Mips32 ISA Support",
                                 [FeatureCondMov, FeatureBitCount]>;
 def FeatureMips32r2    : SubtargetFeature<"mips32r2", "MipsArchVersion",
                                 "Mips32r2", "Mips32r2 ISA Support",
@@ -81,7 +81,7 @@ def : Proc<"r6000", [FeatureMips2]>;
 
 def : Proc<"4ke", [FeatureMips32r2]>;
 
-// Allegrex is a 32bit subset of r4000, both for interger and fp registers,
+// Allegrex is a 32bit subset of r4000, both for integer and fp registers,
 // but much more similar to Mips2 than Mips3. It also contains some of
 // Mips32/Mips32r2 instructions and a custom vector fpu processor.
 def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI,
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index bd28a9b..502f744 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -30,7 +30,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h" 
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -53,14 +53,14 @@ namespace {
       return "Mips Assembly Printer";
     }
 
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                          unsigned AsmVariant, const char *ExtraCode,
                          raw_ostream &O);
     void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
     void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, 
+    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
                          const char *Modifier = 0);
-    void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, 
+    void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
                          const char *Modifier = 0);
     void printSavedRegsBitmask(raw_ostream &O);
     void printHex32(unsigned int Value, raw_ostream &O);
@@ -77,7 +77,8 @@ namespace {
     }
     virtual void EmitFunctionBodyStart();
     virtual void EmitFunctionBodyEnd();
-    virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
+    virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
+                                                   MBB) const;
     static const char *getRegisterName(unsigned RegNo);
 
     virtual void EmitFunctionEntryLabel();
@@ -94,12 +95,12 @@ namespace {
 //  -- Frame directive "frame Stackpointer, Stacksize, RARegister"
 //  Describe the stack frame.
 //
-//  -- Mask directives "(f)mask  bitmask, offset" 
+//  -- Mask directives "(f)mask  bitmask, offset"
 //  Tells the assembler which registers are saved and where.
-//  bitmask - contain a little endian bitset indicating which registers are 
-//            saved on function prologue (e.g. with a 0x80000000 mask, the 
+//  bitmask - contain a little endian bitset indicating which registers are
+//            saved on function prologue (e.g. with a 0x80000000 mask, the
 //            assembler knows the register 31 (RA) is saved at prologue.
-//  offset  - the position before stack pointer subtraction indicating where 
+//  offset  - the position before stack pointer subtraction indicating where
 //            the first saved register on prologue is located. (e.g. with a
 //
 //  Consider the following function prologue:
@@ -110,9 +111,9 @@ namespace {
 //       sw $ra, 40($sp)
 //       sw $fp, 36($sp)
 //
-//    With a 0xc0000000 mask, the assembler knows the register 31 (RA) and 
-//    30 (FP) are saved at prologue. As the save order on prologue is from 
-//    left to right, RA is saved first. A -8 offset means that after the 
+//    With a 0xc0000000 mask, the assembler knows the register 31 (RA) and
+//    30 (FP) are saved at prologue. As the save order on prologue is from
+//    left to right, RA is saved first. A -8 offset means that after the
 //    stack pointer subtration, the first register in the mask (RA) will be
 //    saved at address 48-8=40.
 //
@@ -122,7 +123,7 @@ namespace {
 // Mask directives
 //===----------------------------------------------------------------------===//
 
-// Create a bitmask with all callee saved registers for CPU or Floating Point 
+// Create a bitmask with all callee saved registers for CPU or Floating Point
 // registers. For CPU registers consider RA, GP and FP for saving if necessary.
 void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
   const TargetFrameLowering *TFI = TM.getFrameLowering();
@@ -168,7 +169,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
 // Print a 32 bit hex number with all numbers.
 void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) {
   O << "0x";
-  for (int i = 7; i >= 0; i--) 
+  for (int i = 7; i >= 0; i--)
     O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
 }
 
@@ -191,9 +192,9 @@ void MipsAsmPrinter::emitFrameDirective() {
 }
 
 /// Emit Set directives.
-const char *MipsAsmPrinter::getCurrentABIString() const { 
+const char *MipsAsmPrinter::getCurrentABIString() const {
   switch (Subtarget->getTargetABI()) {
-  case MipsSubtarget::O32:  return "abi32";  
+  case MipsSubtarget::O32:  return "abi32";
   case MipsSubtarget::O64:  return "abiO64";
   case MipsSubtarget::N32:  return "abiN32";
   case MipsSubtarget::N64:  return "abi64";
@@ -203,7 +204,7 @@ const char *MipsAsmPrinter::getCurrentABIString() const {
 
   llvm_unreachable("Unknown Mips ABI");
   return NULL;
-}  
+}
 
 void MipsAsmPrinter::EmitFunctionEntryLabel() {
   OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
@@ -214,7 +215,7 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() {
 /// the first basic block in the function.
 void MipsAsmPrinter::EmitFunctionBodyStart() {
   emitFrameDirective();
-  
+
   SmallString<128> Str;
   raw_svector_ostream OS(Str);
   printSavedRegsBitmask(OS);
@@ -226,7 +227,7 @@ void MipsAsmPrinter::EmitFunctionBodyStart() {
 void MipsAsmPrinter::EmitFunctionBodyEnd() {
   // There are instruction for this macros, but they must
   // always be at the function end, and we can't emit and
-  // break with BB logic. 
+  // break with BB logic.
   OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
   OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
   OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
@@ -236,8 +237,8 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() {
 /// isBlockOnlyReachableByFallthough - Return true if the basic block has
 /// exactly one predecessor and the control transfer mechanism between
 /// the predecessor and this block is a fall-through.
-bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) 
-    const {
+bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
+                                                       MBB) const {
   // The predecessor has to be immediately before this block.
   const MachineBasicBlock *Pred = *MBB->pred_begin();
 
@@ -246,16 +247,41 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *
   if (const BasicBlock *bb = Pred->getBasicBlock())
     if (isa<SwitchInst>(bb->getTerminator()))
       return false;
+
+  // If this is a landing pad, it isn't a fall through.  If it has no preds,
+  // then nothing falls through to it.
+  if (MBB->isLandingPad() || MBB->pred_empty())
+    return false;
+
+  // If there isn't exactly one predecessor, it can't be a fall through.
+  MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+  ++PI2;
+ 
+  if (PI2 != MBB->pred_end())
+    return false;  
+
+  // The predecessor has to be immediately before this block.
+  if (!Pred->isLayoutSuccessor(MBB))
+    return false;
+   
+  // If the block is completely empty, then it definitely does fall through.
+  if (Pred->empty())
+    return true;
   
-  return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
+  // Otherwise, check the last instruction.
+  // Check if the last terminator is an unconditional branch.
+  MachineBasicBlock::const_iterator I = Pred->end();
+  while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) ;
+
+  return !I->getDesc().isBarrier();
 }
 
 // Print out an operand for an inline asm expression.
-bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 
+bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                                      unsigned AsmVariant,const char *ExtraCode,
                                      raw_ostream &O) {
   // Does this asm operand have a single letter operand modifier?
-  if (ExtraCode && ExtraCode[0]) 
+  if (ExtraCode && ExtraCode[0])
     return true; // Unknown modifier.
 
   printOperand(MI, OpNo, O);
@@ -273,22 +299,9 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
   switch(MO.getTargetFlags()) {
   case MipsII::MO_GPREL:    O << "%gp_rel("; break;
   case MipsII::MO_GOT_CALL: O << "%call16("; break;
-  case MipsII::MO_GOT: {
-    const MachineOperand &LastMO = MI->getOperand(opNum-1);
-    bool LastMOIsGP = LastMO.getType() == MachineOperand::MO_Register
-                      && LastMO.getReg() == Mips::GP;
-    if (MI->getOpcode() == Mips::LW || LastMOIsGP)
-      O << "%got(";
-    else
-      O << "%lo(";
-    break;
-  }
-  case MipsII::MO_ABS_HILO:
-    if (MI->getOpcode() == Mips::LUi)
-      O << "%hi(";
-    else
-      O << "%lo(";     
-    break;
+  case MipsII::MO_GOT:      O << "%got(";    break;
+  case MipsII::MO_ABS_HI:   O << "%hi(";     break;
+  case MipsII::MO_ABS_LO:   O << "%lo(";     break;
   }
 
   switch (MO.getType()) {
@@ -308,6 +321,12 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
       O << *Mang->getSymbol(MO.getGlobal());
       break;
 
+    case MachineOperand::MO_BlockAddress: {
+      MCSymbol* BA = GetBlockAddressSymbol(MO.getBlockAddress());
+      O << BA->getName();
+      break;
+    }
+
     case MachineOperand::MO_ExternalSymbol:
       O << *GetExternalSymbolSymbol(MO.getSymbolName());
       break;
@@ -323,7 +342,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
       if (MO.getOffset())
         O << "+" << MO.getOffset();
       break;
-  
+
     default:
       llvm_unreachable("<unknown operand type>");
   }
@@ -336,7 +355,7 @@ void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
   const MachineOperand &MO = MI->getOperand(opNum);
   if (MO.isImm())
     O << (unsigned short int)MO.getImm();
-  else 
+  else
     printOperand(MI, opNum, O);
 }
 
@@ -352,8 +371,8 @@ printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
     return;
   }
 
-  // Load/Store memory operands -- imm($reg) 
-  // If PIC target the target is loaded as the 
+  // Load/Store memory operands -- imm($reg)
+  // If PIC target the target is loaded as the
   // pattern lw $25,%call16($28)
   printOperand(MI, opNum, O);
   O << "(";
@@ -365,12 +384,12 @@ void MipsAsmPrinter::
 printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
                 const char *Modifier) {
   const MachineOperand& MO = MI->getOperand(opNum);
-  O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); 
+  O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
 }
 
 void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
   // FIXME: Use SwitchSection.
-  
+
   // Tell the assembler which ABI we are using
   OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString()));
 
@@ -383,11 +402,11 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
   }
 
   // return to previous section
-  OutStreamer.EmitRawText(StringRef("\t.previous")); 
+  OutStreamer.EmitRawText(StringRef("\t.previous"));
 }
 
 // Force static initialization.
-extern "C" void LLVMInitializeMipsAsmPrinter() { 
+extern "C" void LLVMInitializeMipsAsmPrinter() {
   RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
   RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget);
 }
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 8f313ef..57aeb1d 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -1,23 +1,23 @@
 //===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 // This describes the calling conventions for Mips architecture.
 //===----------------------------------------------------------------------===//
 
 /// CCIfSubtarget - Match if the current subtarget has a feature F.
-class CCIfSubtarget<string F, CCAction A>: 
+class CCIfSubtarget<string F, CCAction A>:
   CCIf<!strconcat("State.getTarget().getSubtarget<MipsSubtarget>().", F), A>;
 
 //===----------------------------------------------------------------------===//
 // Mips O32 Calling Convention
 //===----------------------------------------------------------------------===//
 
-// Only the return rules are defined here for O32. The rules for argument 
+// Only the return rules are defined here for O32. The rules for argument
 // passing are defined in MipsISelLowering.cpp.
 def RetCC_MipsO32 : CallingConv<[
   // i32 are returned in registers V0, V1
@@ -41,15 +41,15 @@ def CC_MipsEABI : CallingConv<[
   // Integer arguments are passed in integer registers.
   CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
 
-  // Single fp arguments are passed in pairs within 32-bit mode 
-  CCIfType<[f32], CCIfSubtarget<"isSingleFloat()", 
+  // Single fp arguments are passed in pairs within 32-bit mode
+  CCIfType<[f32], CCIfSubtarget<"isSingleFloat()",
                   CCAssignToReg<[F12, F13, F14, F15, F16, F17, F18, F19]>>>,
 
-  CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()", 
+  CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()",
                   CCAssignToReg<[F12, F14, F16, F18]>>>,
 
-  // The first 4 doubl fp arguments are passed in single fp registers.
-  CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", 
+  // The first 4 double fp arguments are passed in single fp registers.
+  CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()",
                   CCAssignToReg<[D6, D7, D8, D9]>>>,
 
   // Integer values get stored in stack slots that are 4 bytes in
diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp
new file mode 100644
index 0000000..4423f51
--- /dev/null
+++ b/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -0,0 +1,117 @@
+//===--  MipsExpandPseudo.cpp - Expand pseudo instructions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands pseudo instructions into target instructions after register
+// allocation but before post-RA scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-expand-pseudo"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+namespace {
+  struct MipsExpandPseudo : public MachineFunctionPass {
+
+    TargetMachine &TM;
+    const TargetInstrInfo *TII;
+
+    static char ID;
+    MipsExpandPseudo(TargetMachine &tm)
+      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+    virtual const char *getPassName() const {
+      return "Mips PseudoInstrs Expansion";
+    }
+
+    bool runOnMachineFunction(MachineFunction &F);
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+  private:
+    void ExpandBuildPairF64(MachineBasicBlock&, MachineBasicBlock::iterator);
+    void ExpandExtractElementF64(MachineBasicBlock&,
+                                 MachineBasicBlock::iterator);
+  };
+  char MipsExpandPseudo::ID = 0;
+} // end of anonymous namespace
+
+bool MipsExpandPseudo::runOnMachineFunction(MachineFunction& F) {
+  bool Changed = false;
+
+  for (MachineFunction::iterator I = F.begin(); I != F.end(); ++I)
+    Changed |= runOnMachineBasicBlock(*I);
+
+  return Changed;
+}
+
+bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) {
+
+  bool Changed = false;
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
+    const TargetInstrDesc& Tid = I->getDesc();
+
+    switch(Tid.getOpcode()) {
+    default: 
+      ++I;
+      continue;
+    case Mips::BuildPairF64:
+      ExpandBuildPairF64(MBB, I);
+      break;
+    case Mips::ExtractElementF64:
+      ExpandExtractElementF64(MBB, I);
+      break;
+    } 
+
+    // delete original instr
+    MBB.erase(I++);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB,
+                                            MachineBasicBlock::iterator I) {  
+  unsigned DstReg = I->getOperand(0).getReg();
+  unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
+  const TargetInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);
+  DebugLoc dl = I->getDebugLoc();
+  const unsigned* SubReg =
+    TM.getRegisterInfo()->getSubRegisters(DstReg);
+
+  // mtc1 Lo, $fp
+  // mtc1 Hi, $fp + 1
+  BuildMI(MBB, I, dl, Mtc1Tdd, *SubReg).addReg(LoReg);
+  BuildMI(MBB, I, dl, Mtc1Tdd, *(SubReg + 1)).addReg(HiReg);
+}
+
+void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB,
+                                               MachineBasicBlock::iterator I) {
+  unsigned DstReg = I->getOperand(0).getReg();
+  unsigned SrcReg = I->getOperand(1).getReg();
+  unsigned N = I->getOperand(2).getImm();
+  const TargetInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);
+  DebugLoc dl = I->getDebugLoc();
+  const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg);
+
+  BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(*(SubReg + N));
+}
+
+/// createMipsMipsExpandPseudoPass - Returns a pass that expands pseudo 
+/// instrs into real instrs
+FunctionPass *llvm::createMipsExpandPseudoPass(MipsTargetMachine &tm) {
+  return new MipsExpandPseudo(tm);
+}
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 87a097a..21e3314 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -203,6 +203,46 @@ void MipsFrameLowering::adjustMipsStackFrame(MachineFunction &MF) const {
     MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset);
 }
 
+
+// expand pair of register and immediate if the immediate doesn't fit in the
+// 16-bit offset field.
+// e.g.
+//  if OrigImm = 0x10000, OrigReg = $sp:
+//    generate the following sequence of instrs:
+//      lui  $at, hi(0x10000)
+//      addu $at, $sp, $at
+//
+//    (NewReg, NewImm) = ($at, lo(Ox10000))
+//    return true
+static bool expandRegLargeImmPair(unsigned OrigReg, int OrigImm,
+                                  unsigned& NewReg, int& NewImm,
+                                  MachineBasicBlock& MBB,
+                                  MachineBasicBlock::iterator I) {
+  // OrigImm fits in the 16-bit field
+  if (OrigImm < 0x8000 && OrigImm >= -0x8000) {
+    NewReg = OrigReg;
+    NewImm = OrigImm;
+    return false;
+  }
+
+  MachineFunction* MF = MBB.getParent();
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+  DebugLoc DL = I->getDebugLoc();
+  int ImmLo = OrigImm & 0xffff;
+  int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) +
+              ((OrigImm & 0x8000) != 0);
+
+  // FIXME: change this when mips goes MC".
+  BuildMI(MBB, I, DL, TII->get(Mips::NOAT));
+  BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi);
+  BuildMI(MBB, I, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg)
+                                                     .addReg(Mips::AT);
+  NewReg = Mips::AT;
+  NewImm = ImmLo;
+
+  return true;
+}
+
 void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock &MBB   = MF.front();
   MachineFrameInfo *MFI    = MF.getFrameInfo();
@@ -214,6 +254,9 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
   bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
+  unsigned NewReg = 0;
+  int NewImm = 0;
+  bool ATUsed;
 
   // Get the right frame order for Mips.
   adjustMipsStackFrame(MF);
@@ -236,22 +279,40 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
   BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
 
   // Adjust stack : addi sp, sp, (-imm)
+  ATUsed = expandRegLargeImmPair(Mips::SP, -StackSize, NewReg, NewImm, MBB,
+                                 MBBI);
   BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
-      .addReg(Mips::SP).addImm(-StackSize);
+    .addReg(NewReg).addImm(NewImm);
+
+  // FIXME: change this when mips goes MC".
+  if (ATUsed)
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
 
-  // Save the return address only if the function isnt a leaf one.
+  // Save the return address only if the function isn't a leaf one.
   // sw  $ra, stack_loc($sp)
   if (MFI->adjustsStack()) {
+    ATUsed = expandRegLargeImmPair(Mips::SP, RAOffset, NewReg, NewImm, MBB,
+                                   MBBI);
     BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
-        .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP);
+      .addReg(Mips::RA).addImm(NewImm).addReg(NewReg);
+
+    // FIXME: change this when mips goes MC".
+    if (ATUsed)
+      BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
   }
 
   // if framepointer enabled, save it and set it
   // to point to the stack pointer
   if (hasFP(MF)) {
     // sw  $fp,stack_loc($sp)
+    ATUsed = expandRegLargeImmPair(Mips::SP, FPOffset, NewReg, NewImm, MBB,
+                                   MBBI);
     BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
-      .addReg(Mips::FP).addImm(FPOffset).addReg(Mips::SP);
+      .addReg(Mips::FP).addImm(NewImm).addReg(NewReg);
+
+    // FIXME: change this when mips goes MC".
+    if (ATUsed)
+      BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
 
     // move $fp, $sp
     BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP)
@@ -280,6 +341,10 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
   int FPOffset = MipsFI->getFPStackOffset();
   int RAOffset = MipsFI->getRAStackOffset();
 
+  unsigned NewReg = 0;
+  int NewImm = 0;
+  bool ATUsed = false;
+
   // if framepointer enabled, restore it and restore the
   // stack pointer
   if (hasFP(MF)) {
@@ -288,21 +353,39 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
       .addReg(Mips::FP).addReg(Mips::ZERO);
 
     // lw  $fp,stack_loc($sp)
+    ATUsed = expandRegLargeImmPair(Mips::SP, FPOffset, NewReg, NewImm, MBB,
+                                   MBBI);
     BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::FP)
-      .addImm(FPOffset).addReg(Mips::SP);
+      .addImm(NewImm).addReg(NewReg);
+
+    // FIXME: change this when mips goes MC".
+    if (ATUsed)
+      BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
   }
 
-  // Restore the return address only if the function isnt a leaf one.
+  // Restore the return address only if the function isn't a leaf one.
   // lw  $ra, stack_loc($sp)
   if (MFI->adjustsStack()) {
+    ATUsed = expandRegLargeImmPair(Mips::SP, RAOffset, NewReg, NewImm, MBB,
+                                   MBBI);
     BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA)
-      .addImm(RAOffset).addReg(Mips::SP);
+      .addImm(NewImm).addReg(NewReg);
+
+    // FIXME: change this when mips goes MC".
+    if (ATUsed)
+      BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
   }
 
   // adjust stack  : insert addi sp, sp, (imm)
   if (NumBytes) {
+    ATUsed = expandRegLargeImmPair(Mips::SP, NumBytes, NewReg, NewImm, MBB,
+                                   MBBI);
     BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
-      .addReg(Mips::SP).addImm(NumBytes);
+      .addReg(NewReg).addImm(NewImm);
+
+    // FIXME: change this when mips goes MC".
+    if (ATUsed)
+      BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
   }
 }
 
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index a8426c1..34647df 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ALPHA_FRAMEINFO_H
-#define ALPHA_FRAMEINFO_H
+#ifndef MIPS_FRAMEINFO_H
+#define MIPS_FRAMEINFO_H
 
 #include "Mips.h"
 #include "MipsSubtarget.h"
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 755e04d..0382964 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -52,19 +52,19 @@ class MipsDAGToDAGISel : public SelectionDAGISel {
   /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
   /// make the right decision when generating code for different targets.
   const MipsSubtarget &Subtarget;
- 
+
 public:
   explicit MipsDAGToDAGISel(MipsTargetMachine &tm) :
   SelectionDAGISel(tm),
   TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
-  
+
   // Pass Name
   virtual const char *getPassName() const {
     return "MIPS DAG->DAG Pattern Instruction Selection";
-  } 
-  
+  }
 
-private:  
+
+private:
   // Include the pieces autogenerated from the target description.
   #include "MipsGenDAGISel.inc"
 
@@ -116,12 +116,14 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
     Offset = CurDAG->getTargetConstant(0, MVT::i32);
     return true;
   }
-    
+
   // on PIC code Load GA
   if (TM.getRelocationModel() == Reloc::PIC_) {
-    if ((Addr.getOpcode() == ISD::TargetGlobalAddress) || 
-        (Addr.getOpcode() == ISD::TargetConstantPool) || 
-        (Addr.getOpcode() == ISD::TargetJumpTable)){
+    if ((Addr.getOpcode() == ISD::TargetGlobalAddress) ||
+        (Addr.getOpcode() == ISD::TargetConstantPool) ||
+        (Addr.getOpcode() == ISD::TargetJumpTable) ||
+        (Addr.getOpcode() == ISD::TargetBlockAddress) ||
+        (Addr.getOpcode() == ISD::TargetExternalSymbol)) {
       Base   = CurDAG->getRegister(Mips::GP, MVT::i32);
       Offset = Addr;
       return true;
@@ -130,8 +132,8 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
     if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
         Addr.getOpcode() == ISD::TargetGlobalAddress))
       return false;
-  }    
-  
+  }
+
   // Operand is a result from an ADD.
   if (Addr.getOpcode() == ISD::ADD) {
     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
@@ -158,10 +160,10 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
     // Generate:
     //  lui $2, %hi($CPI1_0)
     //  lwc1 $f0, %lo($CPI1_0)($2)
-    if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi || 
+    if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi ||
          Addr.getOperand(0).getOpcode() == ISD::LOAD) &&
         Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
-      SDValue LoVal = Addr.getOperand(1); 
+      SDValue LoVal = Addr.getOperand(1);
       if (dyn_cast<ConstantPoolSDNode>(LoVal.getOperand(0))) {
         Base = Addr.getOperand(0);
         Offset = LoVal.getOperand(0);
@@ -176,7 +178,7 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
 }
 
 SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
-  MVT::SimpleValueType NVT = 
+  MVT::SimpleValueType NVT =
     N->getValueType(0).getSimpleVT().SimpleTy;
 
   if (!Subtarget.isMips1() || NVT != MVT::f64)
@@ -199,14 +201,14 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   DebugLoc dl = N->getDebugLoc();
 
-  // The second load should start after for 4 bytes. 
+  // The second load should start after for 4 bytes.
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
     Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
   else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Offset0))
-    Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(), 
-                                            MVT::i32, 
-                                            CP->getAlignment(), 
-                                            CP->getOffset()+4, 
+    Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(),
+                                            MVT::i32,
+                                            CP->getAlignment(),
+                                            CP->getOffset()+4,
                                             CP->getTargetFlags());
   else
     return NULL;
@@ -220,16 +222,16 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
   // Generate:
   //    lwc $f0, X($3)
   //    lwc $f1, X+4($3)
-  SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, 
+  SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
                                     MVT::Other, Offset0, Base, Chain);
   SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                                  dl, NVT), 0);
-  SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, 
+  SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
                             MVT::f64, Undef, SDValue(LD0, 0));
 
   SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
                           MVT::Other, Offset1, Base, SDValue(LD0, 1));
-  SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, 
+  SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
                             MVT::f64, I0, SDValue(LD1, 0));
 
   ReplaceUses(SDValue(N, 0), I1);
@@ -241,7 +243,7 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
 
 SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
 
-  if (!Subtarget.isMips1() || 
+  if (!Subtarget.isMips1() ||
       N->getOperand(1).getValueType() != MVT::f64)
     return NULL;
 
@@ -265,12 +267,12 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
   // Get the even and odd part from the f64 register
-  SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd, 
+  SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd,
                                                  dl, MVT::f32, N1);
   SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven,
                                                  dl, MVT::f32, N1);
 
-  // The second store should start after for 4 bytes. 
+  // The second store should start after for 4 bytes.
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
     Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
   else
@@ -315,26 +317,26 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
   }
 
   ///
-  // Instruction Selection not handled by the auto-generated 
+  // Instruction Selection not handled by the auto-generated
   // tablegen selection should be handled here.
-  /// 
+  ///
   switch(Opcode) {
 
     default: break;
 
-    case ISD::SUBE: 
+    case ISD::SUBE:
     case ISD::ADDE: {
       SDValue InFlag = Node->getOperand(2), CmpLHS;
       unsigned Opc = InFlag.getOpcode(); (void)Opc;
-      assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || 
-              (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&  
+      assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+              (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
              "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
 
       unsigned MOp;
       if (Opcode == ISD::ADDE) {
         CmpLHS = InFlag.getValue(0);
         MOp = Mips::ADDu;
-      } else { 
+      } else {
         CmpLHS = InFlag.getOperand(0);
         MOp = Mips::SUBu;
       }
@@ -346,7 +348,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
 
       EVT VT = LHS.getValueType();
       SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2);
-      SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, 
+      SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT,
                                                 SDValue(Carry,0), RHS);
 
       return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
@@ -356,36 +358,34 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
     /// Mul/Div with two results
     case ISD::SDIVREM:
     case ISD::UDIVREM:
+      break;
     case ISD::SMUL_LOHI:
     case ISD::UMUL_LOHI: {
       SDValue Op1 = Node->getOperand(0);
       SDValue Op2 = Node->getOperand(1);
 
       unsigned Op;
-      if (Opcode == ISD::UMUL_LOHI || Opcode == ISD::SMUL_LOHI)
-        Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
-      else
-        Op = (Opcode == ISD::UDIVREM ? Mips::DIVu : Mips::DIV);
+      Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
 
-      SDNode *MulDiv = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
+      SDNode *Mul = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
 
-      SDValue InFlag = SDValue(MulDiv, 0);
-      SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, 
+      SDValue InFlag = SDValue(Mul, 0);
+      SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32,
                                           MVT::Glue, InFlag);
       InFlag = SDValue(Lo,1);
       SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
 
-      if (!SDValue(Node, 0).use_empty()) 
+      if (!SDValue(Node, 0).use_empty())
         ReplaceUses(SDValue(Node, 0), SDValue(Lo,0));
 
-      if (!SDValue(Node, 1).use_empty()) 
+      if (!SDValue(Node, 1).use_empty())
         ReplaceUses(SDValue(Node, 1), SDValue(Hi,0));
 
       return NULL;
     }
 
     /// Special Muls
-    case ISD::MUL: 
+    case ISD::MUL:
       if (Subtarget.isMips32())
         break;
     case ISD::MULHS:
@@ -394,7 +394,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
       SDValue MulOp2 = Node->getOperand(1);
 
       unsigned MulOp  = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
-      SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl, 
+      SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl,
                                                MVT::Glue, MulOp1, MulOp2);
 
       SDValue InFlag = SDValue(MulNode, 0);
@@ -408,24 +408,9 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
     /// Div/Rem operations
     case ISD::SREM:
     case ISD::UREM:
-    case ISD::SDIV: 
-    case ISD::UDIV: {
-      SDValue Op1 = Node->getOperand(0);
-      SDValue Op2 = Node->getOperand(1);
-
-      unsigned Op, MOp;
-      if (Opcode == ISD::SDIV || Opcode == ISD::UDIV) {
-        Op  = (Opcode == ISD::SDIV ? Mips::DIV : Mips::DIVu);
-        MOp = Mips::MFLO;
-      } else {
-        Op  = (Opcode == ISD::SREM ? Mips::DIV : Mips::DIVu);
-        MOp = Mips::MFHI;
-      }
-      SDNode *Node = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
-
-      SDValue InFlag = SDValue(Node, 0);
-      return CurDAG->getMachineNode(MOp, dl, MVT::i32, InFlag);
-    }
+    case ISD::SDIV:
+    case ISD::UDIV:
+      break;
 
     // Get target GOT address.
     case ISD::GLOBAL_OFFSET_TABLE:
@@ -433,15 +418,15 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
 
     case ISD::ConstantFP: {
       ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
-      if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { 
-        SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 
+      if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
+        SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
                                         Mips::ZERO, MVT::i32);
         SDValue Undef = SDValue(
           CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0);
         SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero);
-        SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, 
+        SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
                             MVT::f64, Undef, SDValue(MTC, 0));
-        SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, 
+        SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
                             MVT::f64, I0, SDValue(MTC, 0));
         ReplaceUses(SDValue(Node, 0), I1);
         return I1.getNode();
@@ -460,61 +445,6 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
         return ResNode;
       // Other cases are autogenerated.
       break;
-
-    /// Handle direct and indirect calls when using PIC. On PIC, when 
-    /// GOT is smaller than about 64k (small code) the GA target is 
-    /// loaded with only one instruction. Otherwise GA's target must 
-    /// be loaded with 3 instructions. 
-    case MipsISD::JmpLink: {
-      if (TM.getRelocationModel() == Reloc::PIC_) {
-        unsigned LastOpNum = Node->getNumOperands()-1;
-
-        SDValue Chain  = Node->getOperand(0);
-        SDValue Callee = Node->getOperand(1);
-        SDValue InFlag;
-
-        // Skip the incomming flag if present
-        if (Node->getOperand(LastOpNum).getValueType() == MVT::Glue)
-          LastOpNum--;
-
-        if ( (isa<GlobalAddressSDNode>(Callee)) ||
-             (isa<ExternalSymbolSDNode>(Callee)) )
-        {
-          /// Direct call for global addresses and external symbols
-          SDValue GPReg = CurDAG->getRegister(Mips::GP, MVT::i32);
-
-          // Use load to get GOT target
-          SDValue Ops[] = { Callee, GPReg, Chain };
-          SDValue Load = SDValue(CurDAG->getMachineNode(Mips::LW, dl, MVT::i32, 
-                                     MVT::Other, Ops, 3), 0);
-          Chain = Load.getValue(1);
-
-          // Call target must be on T9
-          Chain = CurDAG->getCopyToReg(Chain, dl, Mips::T9, Load, InFlag);
-        } else 
-          /// Indirect call
-          Chain = CurDAG->getCopyToReg(Chain, dl, Mips::T9, Callee, InFlag);
-
-        // Map the JmpLink operands to JALR
-        SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue);
-        SmallVector<SDValue, 8> Ops;
-        Ops.push_back(CurDAG->getRegister(Mips::T9, MVT::i32));
-
-        for (unsigned i = 2, e = LastOpNum+1; i != e; ++i)
-          Ops.push_back(Node->getOperand(i));
-        Ops.push_back(Chain);
-        Ops.push_back(Chain.getValue(1));
-
-        // Emit Jump and Link Register
-        SDNode *ResNode = CurDAG->getMachineNode(Mips::JALR, dl, NodeTys, 
-                                  &Ops[0], Ops.size());
-
-        // Replace Chain and InFlag
-        ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
-        ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 1));
-        return ResNode;
-      } 
-    }
   }
 
   // Select the default instruction
@@ -529,7 +459,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
   return ResNode;
 }
 
-/// createMipsISelDag - This pass converts a legalized DAG into a 
+/// createMipsISelDag - This pass converts a legalized DAG into a
 /// MIPS-specific DAG, ready for instruction scheduling.
 FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
   return new MipsDAGToDAGISel(TM);
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 1d7a1c0..1f1220f 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -41,15 +41,19 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
     case MipsISD::Lo         : return "MipsISD::Lo";
     case MipsISD::GPRel      : return "MipsISD::GPRel";
     case MipsISD::Ret        : return "MipsISD::Ret";
-    case MipsISD::SelectCC   : return "MipsISD::SelectCC";
-    case MipsISD::FPSelectCC : return "MipsISD::FPSelectCC";
     case MipsISD::FPBrcond   : return "MipsISD::FPBrcond";
     case MipsISD::FPCmp      : return "MipsISD::FPCmp";
+    case MipsISD::CMovFP_T   : return "MipsISD::CMovFP_T";
+    case MipsISD::CMovFP_F   : return "MipsISD::CMovFP_F";
     case MipsISD::FPRound    : return "MipsISD::FPRound";
     case MipsISD::MAdd       : return "MipsISD::MAdd";
     case MipsISD::MAddu      : return "MipsISD::MAddu";
     case MipsISD::MSub       : return "MipsISD::MSub";
     case MipsISD::MSubu      : return "MipsISD::MSubu";
+    case MipsISD::DivRem     : return "MipsISD::DivRem";
+    case MipsISD::DivRemU    : return "MipsISD::DivRemU";
+    case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64";
+    case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
     default                  : return NULL;
   }
 }
@@ -89,25 +93,22 @@ MipsTargetLowering(MipsTargetMachine &TM)
 
   // Mips Custom Operations
   setOperationAction(ISD::GlobalAddress,      MVT::i32,   Custom);
+  setOperationAction(ISD::BlockAddress,       MVT::i32,   Custom);
   setOperationAction(ISD::GlobalTLSAddress,   MVT::i32,   Custom);
   setOperationAction(ISD::JumpTable,          MVT::i32,   Custom);
   setOperationAction(ISD::ConstantPool,       MVT::i32,   Custom);
   setOperationAction(ISD::SELECT,             MVT::f32,   Custom);
   setOperationAction(ISD::SELECT,             MVT::f64,   Custom);
   setOperationAction(ISD::SELECT,             MVT::i32,   Custom);
-  setOperationAction(ISD::SETCC,              MVT::f32,   Custom);
-  setOperationAction(ISD::SETCC,              MVT::f64,   Custom);
   setOperationAction(ISD::BRCOND,             MVT::Other, Custom);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Custom);
   setOperationAction(ISD::FP_TO_SINT,         MVT::i32,   Custom);
   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
 
-
-  // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors'
-  // with operands comming from setcc fp comparions. This is necessary since
-  // the result from these setcc are in a flag registers (FCR31).
-  setOperationAction(ISD::AND,              MVT::i32,   Custom);
-  setOperationAction(ISD::OR,               MVT::i32,   Custom);
+  setOperationAction(ISD::SDIV, MVT::i32, Expand);
+  setOperationAction(ISD::SREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIV, MVT::i32, Expand);
+  setOperationAction(ISD::UREM, MVT::i32, Expand);
 
   // Operations not directly supported by Mips.
   setOperationAction(ISD::BR_JT,             MVT::Other, Expand);
@@ -129,7 +130,9 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::FCOPYSIGN,         MVT::f32,   Expand);
   setOperationAction(ISD::FCOPYSIGN,         MVT::f64,   Expand);
   setOperationAction(ISD::FSIN,              MVT::f32,   Expand);
+  setOperationAction(ISD::FSIN,              MVT::f64,   Expand);
   setOperationAction(ISD::FCOS,              MVT::f32,   Expand);
+  setOperationAction(ISD::FCOS,              MVT::f64,   Expand);
   setOperationAction(ISD::FPOWI,             MVT::f32,   Expand);
   setOperationAction(ISD::FPOW,              MVT::f32,   Expand);
   setOperationAction(ISD::FLOG,              MVT::f32,   Expand);
@@ -139,6 +142,10 @@ MipsTargetLowering(MipsTargetMachine &TM)
 
   setOperationAction(ISD::EH_LABEL,          MVT::Other, Expand);
 
+  setOperationAction(ISD::VAARG,             MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY,            MVT::Other, Expand);
+  setOperationAction(ISD::VAEND,             MVT::Other, Expand);
+
   // Use the default for now
   setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
@@ -160,6 +167,9 @@ MipsTargetLowering(MipsTargetMachine &TM)
 
   setTargetDAGCombine(ISD::ADDE);
   setTargetDAGCombine(ISD::SUBE);
+  setTargetDAGCombine(ISD::SDIVREM);
+  setTargetDAGCombine(ISD::UDIVREM);
+  setTargetDAGCombine(ISD::SETCC);
 
   setStackPointerRegisterToSaveRestore(Mips::SP);
   computeRegisterProperties();
@@ -181,7 +191,7 @@ unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const {
 //  multHi/Lo: product of multiplication
 //  Lo0: initial value of Lo register
 //  Hi0: initial value of Hi register
-// Return true if mattern matching was successful.
+// Return true if pattern matching was successful.
 static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
   // ADDENode's second operand must be a flag output of an ADDC node in order
   // for the matching to be successful.
@@ -255,7 +265,7 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
 //  multHi/Lo: product of multiplication
 //  Lo0: initial value of Lo register
 //  Hi0: initial value of Hi register
-// Return true if mattern matching was successful.
+// Return true if pattern matching was successful.
 static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) {
   // SUBENode's second operand must be a flag output of an SUBC node in order
   // for the matching to be successful.
@@ -346,6 +356,130 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
   return SDValue();
 }
 
+static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
+                                    TargetLowering::DAGCombinerInfo &DCI,
+                                    const MipsSubtarget* Subtarget) {
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
+                                                  MipsISD::DivRemU;
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue DivRem = DAG.getNode(opc, dl, MVT::Glue,
+                               N->getOperand(0), N->getOperand(1));
+  SDValue InChain = DAG.getEntryNode();
+  SDValue InGlue = DivRem;
+
+  // insert MFLO
+  if (N->hasAnyUseOfValue(0)) {
+    SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, Mips::LO, MVT::i32,
+                                            InGlue);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo);
+    InChain = CopyFromLo.getValue(1);
+    InGlue = CopyFromLo.getValue(2);
+  }
+
+  // insert MFHI
+  if (N->hasAnyUseOfValue(1)) {
+    SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl,
+                                               Mips::HI, MVT::i32, InGlue);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi);
+  }
+
+  return SDValue();
+}
+
+static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
+  switch (CC) {
+  default: llvm_unreachable("Unknown fp condition code!");
+  case ISD::SETEQ:
+  case ISD::SETOEQ: return Mips::FCOND_OEQ;
+  case ISD::SETUNE: return Mips::FCOND_UNE;
+  case ISD::SETLT:
+  case ISD::SETOLT: return Mips::FCOND_OLT;
+  case ISD::SETGT:
+  case ISD::SETOGT: return Mips::FCOND_OGT;
+  case ISD::SETLE:
+  case ISD::SETOLE: return Mips::FCOND_OLE;
+  case ISD::SETGE:
+  case ISD::SETOGE: return Mips::FCOND_OGE;
+  case ISD::SETULT: return Mips::FCOND_ULT;
+  case ISD::SETULE: return Mips::FCOND_ULE;
+  case ISD::SETUGT: return Mips::FCOND_UGT;
+  case ISD::SETUGE: return Mips::FCOND_UGE;
+  case ISD::SETUO:  return Mips::FCOND_UN;
+  case ISD::SETO:   return Mips::FCOND_OR;
+  case ISD::SETNE:
+  case ISD::SETONE: return Mips::FCOND_ONE;
+  case ISD::SETUEQ: return Mips::FCOND_UEQ;
+  }
+}
+
+
+// Returns true if condition code has to be inverted.
+static bool InvertFPCondCode(Mips::CondCode CC) {
+  if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
+    return false;
+
+  if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT)
+    return true;
+
+  assert(false && "Illegal Condition Code");
+  return false;
+}
+
+// Creates and returns an FPCmp node from a setcc node.
+// Returns Op if setcc is not a floating point comparison.
+static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) {
+  // must be a SETCC node
+  if (Op.getOpcode() != ISD::SETCC)
+    return Op;
+
+  SDValue LHS = Op.getOperand(0);
+
+  if (!LHS.getValueType().isFloatingPoint())
+    return Op;
+
+  SDValue RHS = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of
+  // node if necessary.
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+  return DAG.getNode(MipsISD::FPCmp, dl, MVT::Glue, LHS, RHS,
+                     DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
+}
+
+// Creates and returns a CMovFPT/F node.
+static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True,
+                            SDValue False, DebugLoc DL) {
+  bool invert = InvertFPCondCode((Mips::CondCode)
+                                 cast<ConstantSDNode>(Cond.getOperand(2))
+                                 ->getSExtValue());
+
+  return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL,
+                     True.getValueType(), True, False, Cond);
+}
+
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG,
+                                   TargetLowering::DAGCombinerInfo &DCI,
+                                   const MipsSubtarget* Subtarget) {
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  SDValue Cond = CreateFPCmp(DAG, SDValue(N, 0));
+
+  if (Cond.getOpcode() != MipsISD::FPCmp)
+    return SDValue();
+
+  SDValue True  = DAG.getConstant(1, MVT::i32);
+  SDValue False = DAG.getConstant(0, MVT::i32);
+
+  return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc());
+}
+
 SDValue  MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
   const {
   SelectionDAG &DAG = DCI.DAG;
@@ -357,6 +491,11 @@ SDValue  MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
     return PerformADDECombine(N, DAG, DCI, Subtarget);
   case ISD::SUBE:
     return PerformSUBECombine(N, DAG, DCI, Subtarget);
+  case ISD::SDIVREM:
+  case ISD::UDIVREM:
+    return PerformDivRemCombine(N, DAG, DCI, Subtarget);
+  case ISD::SETCC:
+    return PerformSETCCCombine(N, DAG, DCI, Subtarget);
   }
 
   return SDValue();
@@ -367,17 +506,15 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
 {
   switch (Op.getOpcode())
   {
-    case ISD::AND:                return LowerANDOR(Op, DAG);
     case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
     case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
     case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
     case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
     case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+    case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
     case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
     case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
-    case ISD::OR:                 return LowerANDOR(Op, DAG);
     case ISD::SELECT:             return LowerSELECT(Op, DAG);
-    case ISD::SETCC:              return LowerSETCC(Op, DAG);
     case ISD::VASTART:            return LowerVASTART(Op, DAG);
   }
   return SDValue();
@@ -410,122 +547,110 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
   return Mips::BRANCH_INVALID;
 }
 
-static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) {
-  switch(BC) {
-    default:
-      llvm_unreachable("Unknown branch code");
-    case Mips::BRANCH_T  : return Mips::BC1T;
-    case Mips::BRANCH_F  : return Mips::BC1F;
-    case Mips::BRANCH_TL : return Mips::BC1TL;
-    case Mips::BRANCH_FL : return Mips::BC1FL;
-  }
-}
-
-static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
-  switch (CC) {
-  default: llvm_unreachable("Unknown fp condition code!");
-  case ISD::SETEQ:
-  case ISD::SETOEQ: return Mips::FCOND_EQ;
-  case ISD::SETUNE: return Mips::FCOND_OGL;
-  case ISD::SETLT:
-  case ISD::SETOLT: return Mips::FCOND_OLT;
-  case ISD::SETGT:
-  case ISD::SETOGT: return Mips::FCOND_OGT;
-  case ISD::SETLE:
-  case ISD::SETOLE: return Mips::FCOND_OLE;
-  case ISD::SETGE:
-  case ISD::SETOGE: return Mips::FCOND_OGE;
-  case ISD::SETULT: return Mips::FCOND_ULT;
-  case ISD::SETULE: return Mips::FCOND_ULE;
-  case ISD::SETUGT: return Mips::FCOND_UGT;
-  case ISD::SETUGE: return Mips::FCOND_UGE;
-  case ISD::SETUO:  return Mips::FCOND_UN;
-  case ISD::SETO:   return Mips::FCOND_OR;
-  case ISD::SETNE:
-  case ISD::SETONE: return Mips::FCOND_NEQ;
-  case ISD::SETUEQ: return Mips::FCOND_UEQ;
-  }
-}
-
 MachineBasicBlock *
 MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                 MachineBasicBlock *BB) const {
+  // There is no need to expand CMov instructions if target has
+  // conditional moves.
+  if (Subtarget->hasCondMov())
+    return BB;
+
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   bool isFPCmp = false;
   DebugLoc dl = MI->getDebugLoc();
+  unsigned Opc;
 
   switch (MI->getOpcode()) {
   default: assert(false && "Unexpected instr type to insert");
-  case Mips::Select_FCC:
-  case Mips::Select_FCC_S32:
-  case Mips::Select_FCC_D32:
-    isFPCmp = true; // FALL THROUGH
-  case Mips::Select_CC:
-  case Mips::Select_CC_S32:
-  case Mips::Select_CC_D32: {
-    // To "insert" a SELECT_CC instruction, we actually have to insert the
-    // diamond control-flow pattern.  The incoming instruction knows the
-    // destination vreg to set, the condition code register to branch on, the
-    // true/false values to select between, and a branch opcode to use.
-    const BasicBlock *LLVM_BB = BB->getBasicBlock();
-    MachineFunction::iterator It = BB;
-    ++It;
-
-    //  thisMBB:
-    //  ...
-    //   TrueVal = ...
-    //   setcc r1, r2, r3
-    //   bNE   r1, r0, copy1MBB
-    //   fallthrough --> copy0MBB
-    MachineBasicBlock *thisMBB  = BB;
-    MachineFunction *F = BB->getParent();
-    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
-    MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
-    F->insert(It, copy0MBB);
-    F->insert(It, sinkMBB);
-
-    // Transfer the remainder of BB and its successor edges to sinkMBB.
-    sinkMBB->splice(sinkMBB->begin(), BB,
-                    llvm::next(MachineBasicBlock::iterator(MI)),
-                    BB->end());
-    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
-    // Next, add the true and fallthrough blocks as its successors.
-    BB->addSuccessor(copy0MBB);
-    BB->addSuccessor(sinkMBB);
-
-    // Emit the right instruction according to the type of the operands compared
-    if (isFPCmp) {
-      // Find the condiction code present in the setcc operation.
-      Mips::CondCode CC = (Mips::CondCode)MI->getOperand(4).getImm();
-      // Get the branch opcode from the branch code.
-      unsigned Opc = FPBranchCodeToOpc(GetFPBranchCodeFromCond(CC));
-      BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
-    } else
-      BuildMI(BB, dl, TII->get(Mips::BNE)).addReg(MI->getOperand(1).getReg())
-        .addReg(Mips::ZERO).addMBB(sinkMBB);
-
-    //  copy0MBB:
-    //   %FalseValue = ...
-    //   # fallthrough to sinkMBB
-    BB = copy0MBB;
-
-    // Update machine-CFG edges
-    BB->addSuccessor(sinkMBB);
-
-    //  sinkMBB:
-    //   %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
-    //  ...
-    BB = sinkMBB;
+  case Mips::MOVT:
+  case Mips::MOVT_S:
+  case Mips::MOVT_D:
+    isFPCmp = true;
+    Opc = Mips::BC1F;
+    break;
+  case Mips::MOVF:
+  case Mips::MOVF_S:
+  case Mips::MOVF_D:
+    isFPCmp = true;
+    Opc = Mips::BC1T;
+    break;
+  case Mips::MOVZ_I:
+  case Mips::MOVZ_S:
+  case Mips::MOVZ_D:
+    Opc = Mips::BNE;
+    break;
+  case Mips::MOVN_I:
+  case Mips::MOVN_S:
+  case Mips::MOVN_D:
+    Opc = Mips::BEQ;
+    break;
+  }
+
+  // To "insert" a SELECT_CC instruction, we actually have to insert the
+  // diamond control-flow pattern.  The incoming instruction knows the
+  // destination vreg to set, the condition code register to branch on, the
+  // true/false values to select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   setcc r1, r2, r3
+  //   bNE   r1, r0, copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB  = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Next, add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(sinkMBB);
+
+  // Emit the right instruction according to the type of the operands compared
+  if (isFPCmp)
+    BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
+  else
+    BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(2).getReg())
+      .addReg(Mips::ZERO).addMBB(sinkMBB);
+
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to sinkMBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(sinkMBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+  //  ...
+  BB = sinkMBB;
+
+  if (isFPCmp)
     BuildMI(*BB, BB->begin(), dl,
             TII->get(Mips::PHI), MI->getOperand(0).getReg())
       .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB)
-      .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB);
+      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
+  else
+    BuildMI(*BB, BB->begin(), dl,
+            TII->get(Mips::PHI), MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB)
+      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
 
-    MI->eraseFromParent();   // The pseudo instruction is gone now.
-    return BB;
-  }
-  }
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
 }
 
 //===----------------------------------------------------------------------===//
@@ -590,27 +715,6 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
 }
 
 SDValue MipsTargetLowering::
-LowerANDOR(SDValue Op, SelectionDAG &DAG) const
-{
-  SDValue LHS   = Op.getOperand(0);
-  SDValue RHS   = Op.getOperand(1);
-  DebugLoc dl   = Op.getDebugLoc();
-
-  if (LHS.getOpcode() != MipsISD::FPCmp || RHS.getOpcode() != MipsISD::FPCmp)
-    return Op;
-
-  SDValue True  = DAG.getConstant(1, MVT::i32);
-  SDValue False = DAG.getConstant(0, MVT::i32);
-
-  SDValue LSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
-                             LHS, True, False, LHS.getOperand(2));
-  SDValue RSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
-                             RHS, True, False, RHS.getOperand(2));
-
-  return DAG.getNode(Op.getOpcode(), dl, MVT::i32, LSEL, RSEL);
-}
-
-SDValue MipsTargetLowering::
 LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
 {
   // The first operand is the chain, the second is the condition, the third is
@@ -619,58 +723,32 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
   SDValue Dest = Op.getOperand(2);
   DebugLoc dl = Op.getDebugLoc();
 
-  if (Op.getOperand(1).getOpcode() != MipsISD::FPCmp)
+  SDValue CondRes = CreateFPCmp(DAG, Op.getOperand(1));
+
+  // Return if flag is not set by a floating point comparison.
+  if (CondRes.getOpcode() != MipsISD::FPCmp)
     return Op;
 
-  SDValue CondRes = Op.getOperand(1);
   SDValue CCNode  = CondRes.getOperand(2);
   Mips::CondCode CC =
     (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
   SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32);
 
   return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode,
-             Dest, CondRes);
-}
-
-SDValue MipsTargetLowering::
-LowerSETCC(SDValue Op, SelectionDAG &DAG) const
-{
-  // The operands to this are the left and right operands to compare (ops #0,
-  // and #1) and the condition code to compare them with (op #2) as a
-  // CondCodeSDNode.
-  SDValue LHS = Op.getOperand(0);
-  SDValue RHS = Op.getOperand(1);
-  DebugLoc dl = Op.getDebugLoc();
-
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-
-  return DAG.getNode(MipsISD::FPCmp, dl, Op.getValueType(), LHS, RHS,
-                 DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
+                     Dest, CondRes);
 }
 
 SDValue MipsTargetLowering::
 LowerSELECT(SDValue Op, SelectionDAG &DAG) const
 {
-  SDValue Cond  = Op.getOperand(0);
-  SDValue True  = Op.getOperand(1);
-  SDValue False = Op.getOperand(2);
-  DebugLoc dl = Op.getDebugLoc();
+  SDValue Cond = CreateFPCmp(DAG, Op.getOperand(0));
 
-  // if the incomming condition comes from a integer compare, the select
-  // operation must be SelectCC or a conditional move if the subtarget
-  // supports it.
-  if (Cond.getOpcode() != MipsISD::FPCmp) {
-    if (Subtarget->hasCondMov() && !True.getValueType().isFloatingPoint())
-      return Op;
-    return DAG.getNode(MipsISD::SelectCC, dl, True.getValueType(),
-                       Cond, True, False);
-  }
+  // Return if flag is not set by a floating point comparison.
+  if (Cond.getOpcode() != MipsISD::FPCmp)
+    return Op;
 
-  // if the incomming condition comes from fpcmp, the select
-  // operation must use FPSelectCC.
-  SDValue CCNode = Cond.getOperand(2);
-  return DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
-                     Cond, True, False, CCNode);
+  return CreateCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2),
+                      Op.getDebugLoc());
 }
 
 SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
@@ -693,12 +771,13 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
       return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode);
     }
     // %hi/%lo relocation
-    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
-                                            MipsII::MO_ABS_HILO);
-    SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GA, 1);
-    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA);
+    SDValue GAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                              MipsII::MO_ABS_HI);
+    SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                              MipsII::MO_ABS_LO);
+    SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GAHi, 1);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo);
     return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
-
   } else {
     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
                                             MipsII::MO_GOT);
@@ -707,9 +786,12 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
                                   false, false, 0);
     // On functions and global targets not internal linked only
     // a load from got/GP is necessary for PIC to work.
-    if (!GV->hasLocalLinkage() || isa<Function>(GV))
+    if (!GV->hasInternalLinkage() &&
+        (!GV->hasLocalLinkage() || isa<Function>(GV)))
       return ResNode;
-    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA);
+    SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                              MipsII::MO_ABS_LO);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo);
     return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo);
   }
 
@@ -717,6 +799,34 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
   return SDValue(0,0);
 }
 
+SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+    // %hi/%lo relocation
+    SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true,
+                                       MipsII::MO_ABS_HI);
+    SDValue BALo = DAG.getBlockAddress(BA, MVT::i32, true,
+                                       MipsII::MO_ABS_LO);
+    SDValue Hi = DAG.getNode(MipsISD::Hi, dl, MVT::i32, BAHi);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALo);
+    return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
+  }
+
+  SDValue BAGOTOffset = DAG.getBlockAddress(BA, MVT::i32, true,
+                                            MipsII::MO_GOT);
+  SDValue BALOOffset = DAG.getBlockAddress(BA, MVT::i32, true,
+                                           MipsII::MO_ABS_LO);
+  SDValue Load = DAG.getLoad(MVT::i32, dl,
+                             DAG.getEntryNode(), BAGOTOffset,
+                             MachinePointerInfo(), false, false, 0);
+  SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALOOffset);
+  return DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+}
+
 SDValue MipsTargetLowering::
 LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 {
@@ -732,7 +842,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
   bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
-  unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HILO;
+  unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HI;
 
   EVT PtrVT = Op.getValueType();
   JumpTableSDNode *JT  = cast<JumpTableSDNode>(Op);
@@ -747,7 +857,9 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
                          MachinePointerInfo(),
                          false, false, 0);
 
-  SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI);
+  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+                                         MipsII::MO_ABS_LO);
+  SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTILo);
   ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
 
   return ResNode;
@@ -764,7 +876,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
 
   // gp_rel relocation
   // FIXME: we should reference the constant pool using small data sections,
-  // but the asm printer currently doens't support this feature without
+  // but the asm printer currently doesn't support this feature without
   // hacking it. This feature should come soon so we can uncomment the
   // stuff below.
   //if (IsInSmallSection(C->getType())) {
@@ -773,18 +885,22 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
   //  ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
 
   if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
-    SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
-                                      N->getOffset(), MipsII::MO_ABS_HILO);
-    SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP);
-    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
+    SDValue CPHi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+                                             N->getOffset(), MipsII::MO_ABS_HI);
+    SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+                                             N->getOffset(), MipsII::MO_ABS_LO);
+    SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CPHi);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
     ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
   } else {
     SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
-                                      N->getOffset(), MipsII::MO_GOT);
+                                           N->getOffset(), MipsII::MO_GOT);
     SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(),
                                CP, MachinePointerInfo::getConstantPool(),
                                false, false, 0);
-    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
+    SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+                                             N->getOffset(), MipsII::MO_ABS_LO);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
     ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
   }
 
@@ -937,46 +1053,28 @@ static bool CC_MipsO32_VarArgs(unsigned ValNo, MVT ValVT,
       LocInfo = CCValAssign::AExt;
   }
 
-  if (ValVT == MVT::i32 || ValVT == MVT::f32) {
-    if (unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize)) {
-      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
-      return false;
-    }
-    unsigned Off = State.AllocateStack(4, 4);
-    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Off, LocVT, LocInfo));
-    return false;
-  }
-
-  unsigned UnallocIntReg = State.getFirstUnallocated(IntRegs, IntRegsSize);
-  if (ValVT == MVT::f64) {
-    if (IntRegs[UnallocIntReg] == (unsigned (Mips::A1))) {
-      // A1 can't be used anymore, because 64 bit arguments
-      // must be aligned when copied back to the caller stack
-      State.AllocateReg(IntRegs, IntRegsSize);
-      UnallocIntReg++;
-    }
-
-    if (IntRegs[UnallocIntReg] == (unsigned (Mips::A0)) ||
-        IntRegs[UnallocIntReg] == (unsigned (Mips::A2))) {
-      unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize);
-      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
-      // Shadow the next register so it can be used
-      // later to get the other 32bit part.
-      State.AllocateReg(IntRegs, IntRegsSize);
-      return false;
-    }
+  unsigned Reg;
 
-    // Register is shadowed to preserve alignment, and the
-    // argument goes to a stack location.
-    if (UnallocIntReg != IntRegsSize)
-      State.AllocateReg(IntRegs, IntRegsSize);
+  if (ValVT == MVT::i32 || ValVT == MVT::f32) {
+    Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    LocVT = MVT::i32;
+  } else if (ValVT == MVT::f64) {
+    Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    if (Reg == Mips::A1 || Reg == Mips::A3)
+      Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    State.AllocateReg(IntRegs, IntRegsSize);
+    LocVT = MVT::i32;
+  } else
+    llvm_unreachable("Cannot handle this ValVT.");
 
-    unsigned Off = State.AllocateStack(8, 8);
-    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Off, LocVT, LocInfo));
-    return false;
-  }
+  if (!Reg) {
+    unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+    unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  } else
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
 
-  return true; // CC didn't match
+  return false; // CC must always match
 }
 
 //===----------------------------------------------------------------------===//
@@ -1043,11 +1141,12 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
         if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32)
           Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
         if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) {
-          Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
-          SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,
-                                   DAG.getConstant(0, getPointerTy()));
-          SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,
-                                   DAG.getConstant(1, getPointerTy()));
+          SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+                                   Arg, DAG.getConstant(0, MVT::i32));
+          SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+                                   Arg, DAG.getConstant(1, MVT::i32));
+          if (!Subtarget->isLittle())
+            std::swap(Lo, Hi);
           RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
           RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi));
           continue;
@@ -1100,7 +1199,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
   // Build a sequence of copy-to-reg nodes chained together with token
   // chain and flag operands which copy the outgoing args into registers.
-  // The InFlag in necessary since all emited instructions must be
+  // The InFlag in necessary since all emitted instructions must be
   // stuck together.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
@@ -1113,12 +1212,52 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   // node so that legalize doesn't hack it.
   unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
-                                getPointerTy(), 0, OpFlag);
-  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+  bool LoadSymAddr = false;
+  SDValue CalleeLo;
+
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    if (IsPIC && G->getGlobal()->hasInternalLinkage()) {
+      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+                                          getPointerTy(), 0,MipsII:: MO_GOT);
+      CalleeLo = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(),
+                                            0, MipsII::MO_ABS_LO);
+    } else {
+      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+                                          getPointerTy(), 0, OpFlag);
+    }
+
+    LoadSymAddr = true;
+  }
+  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
                                 getPointerTy(), OpFlag);
+    LoadSymAddr = true;
+  }
+
+  // Create nodes that load address of callee and copy it to T9
+  if (IsPIC) {
+    if (LoadSymAddr) {
+      // Load callee address
+      SDValue LoadValue = DAG.getLoad(MVT::i32, dl, Chain, Callee,
+                                      MachinePointerInfo::getGOT(),
+                                      false, false, 0);
+
+      // Use GOT+LO if callee has internal linkage.
+      if (CalleeLo.getNode()) {
+        SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CalleeLo);
+        Callee = DAG.getNode(ISD::ADD, dl, MVT::i32, LoadValue, Lo);
+      } else
+        Callee = LoadValue;
+
+      // Use chain output from LoadValue 
+      Chain = LoadValue.getValue(1);
+    }
+
+    // copy to T9
+    Chain = DAG.getCopyToReg(Chain, dl, Mips::T9, Callee, SDValue(0, 0));
+    InFlag = Chain.getValue(1);
+    Callee = DAG.getRegister(Mips::T9, MVT::i32);
+  }
 
   // MipsJmpLink = #chain, #target_address, #opt_in_flags...
   //             = Chain, Callee, Reg#1, Reg#2, ...
@@ -1143,7 +1282,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
   // Create a stack location to hold GP when PIC is used. This stack
   // location is used on function prologue to save GP and also after all
-  // emited CALL's to restore GP.
+  // emitted CALL's to restore GP.
   if (IsPIC) {
       // Function can have an arbitrary number of calls, so
       // hold the LastArgStackLoc with the biggest offset.
@@ -1218,18 +1357,18 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 /// and generate load operations for arguments places on the stack.
 SDValue
 MipsTargetLowering::LowerFormalArguments(SDValue Chain,
-                                        CallingConv::ID CallConv, bool isVarArg,
-                                        const SmallVectorImpl<ISD::InputArg>
-                                        &Ins,
-                                        DebugLoc dl, SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals)
+                                         CallingConv::ID CallConv,
+                                         bool isVarArg,
+                                         const SmallVectorImpl<ISD::InputArg>
+                                         &Ins,
+                                         DebugLoc dl, SelectionDAG &DAG,
+                                         SmallVectorImpl<SDValue> &InVals)
                                           const {
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
 
-  unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
   MipsFI->setVarArgsFrameIndex(0);
 
   // Used with vargs to acumulate store chains.
@@ -1249,9 +1388,9 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
   else
     CCInfo.AnalyzeFormalArguments(Ins, CC_Mips);
 
-  SDValue StackPtr;
-
   unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16);
+  unsigned LastStackArgEndOffset = 0;
+  EVT LastRegArgValVT;
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
@@ -1260,6 +1399,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
     if (VA.isRegLoc()) {
       EVT RegVT = VA.getLocVT();
       ArgRegEnd = VA.getLocReg();
+      LastRegArgValVT = VA.getValVT();
       TargetRegisterClass *RC = 0;
 
       if (RegVT == MVT::i32)
@@ -1300,8 +1440,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
           unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
                                     VA.getLocReg()+1, RC);
           SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
-          SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, ArgValue2, ArgValue);
-          ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Pair);
+          if (!Subtarget->isLittle())
+            std::swap(ArgValue, ArgValue2);
+          ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64,
+                                 ArgValue, ArgValue2);
         }
       }
 
@@ -1321,10 +1463,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
       // used instead of a direct negative address (which is recorded to
       // be used on emitPrologue) to avoid mis-calc of the first stack
       // offset on PEI::calculateFrameObjectOffsets.
-      // Arguments are always 32-bit.
-      unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+      unsigned ArgSize = VA.getValVT().getSizeInBits()/8;
+      LastStackArgEndOffset = FirstStackArgLoc + VA.getLocMemOffset() + ArgSize;
       int FI = MFI->CreateFixedObject(ArgSize, 0, true);
-      MipsFI->recordLoadArgsFI(FI, -(ArgSize+
+      MipsFI->recordLoadArgsFI(FI, -(4 +
         (FirstStackArgLoc + VA.getLocMemOffset())));
 
       // Create load nodes to retrieve arguments from the stack
@@ -1351,29 +1493,52 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
   // To meet ABI, when VARARGS are passed on registers, the registers
   // must have their values written to the caller stack frame. If the last
   // argument was placed in the stack, there's no need to save any register.
-  if ((isVarArg) && (Subtarget->isABI_O32() && ArgRegEnd)) {
-    if (StackPtr.getNode() == 0)
-      StackPtr = DAG.getRegister(StackReg, getPointerTy());
-
-    // The last register argument that must be saved is Mips::A3
-    TargetRegisterClass *RC = Mips::CPURegsRegisterClass;
-    unsigned StackLoc = ArgLocs.size()-1;
-
-    for (++ArgRegEnd; ArgRegEnd <= Mips::A3; ++ArgRegEnd, ++StackLoc) {
-      unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC);
-      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32);
-
-      int FI = MFI->CreateFixedObject(4, 0, true);
-      MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
-      SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
-      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
-                                       MachinePointerInfo(),
-                                       false, false, 0));
-
-      // Record the frame index of the first variable argument
-      // which is a value necessary to VASTART.
-      if (!MipsFI->getVarArgsFrameIndex())
+  if (isVarArg && Subtarget->isABI_O32()) {
+    if (ArgRegEnd) {
+      // Last named formal argument is passed in register.
+
+      // The last register argument that must be saved is Mips::A3
+      TargetRegisterClass *RC = Mips::CPURegsRegisterClass;
+      if (LastRegArgValVT == MVT::f64)
+        ArgRegEnd++;
+
+      if (ArgRegEnd < Mips::A3) {
+        // Both the last named formal argument and the first variable
+        // argument are passed in registers.
+        for (++ArgRegEnd; ArgRegEnd <= Mips::A3; ++ArgRegEnd) {
+          unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC);
+          SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32);
+
+          int FI = MFI->CreateFixedObject(4, 0, true);
+          MipsFI->recordStoreVarArgsFI(FI, -(4+(ArgRegEnd-Mips::A0)*4));
+          SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+          OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+                                           MachinePointerInfo(),
+                                           false, false, 0));
+
+          // Record the frame index of the first variable argument
+          // which is a value necessary to VASTART.
+          if (!MipsFI->getVarArgsFrameIndex()) {
+            MFI->setObjectAlignment(FI, 4);
+            MipsFI->setVarArgsFrameIndex(FI);
+          }
+        }
+      } else {
+        // Last named formal argument is in register Mips::A3, and the first
+        // variable argument is on stack. Record the frame index of the first
+        // variable argument.
+        int FI = MFI->CreateFixedObject(4, 0, true);
+        MFI->setObjectAlignment(FI, 4);
+        MipsFI->recordStoreVarArgsFI(FI, -20);
         MipsFI->setVarArgsFrameIndex(FI);
+      }
+    } else {
+      // Last named formal argument and all the variable arguments are passed
+      // on stack. Record the frame index of the first variable argument.
+      int FI = MFI->CreateFixedObject(4, 0, true);
+      MFI->setObjectAlignment(FI, 4);
+      MipsFI->recordStoreVarArgsFI(FI, -(4+LastStackArgEndOffset));
+      MipsFI->setVarArgsFrameIndex(FI);
     }
   }
 
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 9d6b9f3..e4d0c3d 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -31,45 +31,50 @@ namespace llvm {
 
       // Get the Higher 16 bits from a 32-bit immediate
       // No relation with Mips Hi register
-      Hi, 
+      Hi,
 
       // Get the Lower 16 bits from a 32-bit immediate
       // No relation with Mips Lo register
-      Lo, 
+      Lo,
 
       // Handle gp_rel (small data/bss sections) relocation.
       GPRel,
 
-      // Select CC Pseudo Instruction
-      SelectCC,
-
-      // Floating Point Select CC Pseudo Instruction
-      FPSelectCC,
-
       // Floating Point Branch Conditional
       FPBrcond,
 
       // Floating Point Compare
       FPCmp,
 
+      // Floating Point Conditional Moves
+      CMovFP_T,
+      CMovFP_F,
+
       // Floating Point Rounding
       FPRound,
 
-      // Return 
+      // Return
       Ret,
 
       // MAdd/Sub nodes
       MAdd,
       MAddu,
       MSub,
-      MSubu
+      MSubu,
+
+      // DivRem(u)
+      DivRem,
+      DivRemU,
+
+      BuildPairF64,
+      ExtractElementF64
     };
   }
 
   //===--------------------------------------------------------------------===//
   // TargetLowering Implementation
   //===--------------------------------------------------------------------===//
-  
+
   class MipsTargetLowering : public TargetLowering  {
   public:
     explicit MipsTargetLowering(MipsTargetMachine &TM);
@@ -77,7 +82,7 @@ namespace llvm {
     /// LowerOperation - Provide custom lowering hooks for some operations.
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
-    /// getTargetNodeName - This method returns the name of a target specific 
+    /// getTargetNodeName - This method returns the name of a target specific
     //  DAG node.
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
@@ -87,7 +92,7 @@ namespace llvm {
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
 
-    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; 
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   private:
     // Subtarget Info
     const MipsSubtarget *Subtarget;
@@ -101,16 +106,15 @@ namespace llvm {
                             SmallVectorImpl<SDValue> &InVals) const;
 
     // Lower Operand specifics
-    SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
 
     virtual SDValue
@@ -149,7 +153,7 @@ namespace llvm {
     ConstraintWeight getSingleConstraintMatchWeight(
       AsmOperandInfo &info, const char *constraint) const;
 
-    std::pair<unsigned, const TargetRegisterClass*> 
+    std::pair<unsigned, const TargetRegisterClass*>
               getRegForInlineAsmConstraint(const std::string &Constraint,
               EVT VT) const;
 
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 977e0df..a86c5c7 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -24,19 +24,28 @@
 //===----------------------------------------------------------------------===//
 
 // Floating Point Compare and Branch
-def SDT_MipsFPBrcond : SDTypeProfile<0, 3, [SDTCisSameAs<0, 2>, SDTCisInt<0>,
-                                     SDTCisVT<1, OtherVT>]>;
-def SDT_MipsFPCmp : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
-                                         SDTCisSameAs<1, 2>, SDTCisFP<1>, 
-                                         SDTCisInt<3>]>;
-def SDT_MipsFPSelectCC : SDTypeProfile<1, 4, [SDTCisInt<1>, SDTCisInt<4>,
-                                  SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>;
-
+def SDT_MipsFPBrcond : SDTypeProfile<0, 2, [SDTCisInt<0>,
+                                            SDTCisVT<1, OtherVT>]>;
+def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>,
+                                         SDTCisInt<2>]>;
+def SDT_MipsCMovFP : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+                                          SDTCisSameAs<1, 2>]>;
+def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
+                                                SDTCisVT<1, i32>,
+                                                SDTCisSameAs<1, 2>]>;
+def SDT_MipsExtractElementF64 : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+                                                     SDTCisVT<1, f64>,
+                                                     SDTCisVT<0, i32>]>;
+
+def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp, [SDNPOutGlue]>;
+def MipsCMovFP_T : SDNode<"MipsISD::CMovFP_T", SDT_MipsCMovFP, [SDNPInGlue]>;
+def MipsCMovFP_F : SDNode<"MipsISD::CMovFP_F", SDT_MipsCMovFP, [SDNPInGlue]>;
 def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInGlue]>;
-def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond, 
-                          [SDNPHasChain]>; 
-def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp>;
-def MipsFPSelectCC : SDNode<"MipsISD::FPSelectCC", SDT_MipsFPSelectCC>;
+def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond,
+                          [SDNPHasChain, SDNPOptInGlue]>;
+def MipsBuildPairF64 : SDNode<"MipsISD::BuildPairF64", SDT_MipsBuildPairF64>;
+def MipsExtractElementF64 : SDNode<"MipsISD::ExtractElementF64",
+                                   SDT_MipsExtractElementF64>;
 
 // Operand for printing out a condition code.
 let PrintMethod = "printFCCOperand" in
@@ -54,7 +63,7 @@ def IsNotMipsI       : Predicate<"!Subtarget.isMips1()">;
 //===----------------------------------------------------------------------===//
 // Instruction Class Templates
 //
-// A set of multiclasses is used to address the register usage. 
+// A set of multiclasses is used to address the register usage.
 //
 // S32 - single precision in 16 32bit even fp registers
 //       single precision in 32 32bit fp registers in SingleOnly mode
@@ -65,7 +74,7 @@ def IsNotMipsI       : Predicate<"!Subtarget.isMips1()">;
 // Only S32 and D32 are supported right now.
 //===----------------------------------------------------------------------===//
 
-multiclass FFR1_1<bits<6> funct, string asmstr> 
+multiclass FFR1_1<bits<6> funct, string asmstr>
 {
   def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
       !strconcat(asmstr, ".s $fd, $fs"), []>;
@@ -74,31 +83,31 @@ multiclass FFR1_1<bits<6> funct, string asmstr>
       !strconcat(asmstr, ".d $fd, $fs"), []>, Requires<[In32BitMode]>;
 }
 
-multiclass FFR1_2<bits<6> funct, string asmstr, SDNode FOp> 
+multiclass FFR1_2<bits<6> funct, string asmstr, SDNode FOp>
 {
   def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
-                 !strconcat(asmstr, ".s $fd, $fs"), 
+                 !strconcat(asmstr, ".s $fd, $fs"),
                  [(set FGR32:$fd, (FOp FGR32:$fs))]>;
 
   def _D32  : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
-                 !strconcat(asmstr, ".d $fd, $fs"), 
+                 !strconcat(asmstr, ".d $fd, $fs"),
                  [(set AFGR64:$fd, (FOp AFGR64:$fs))]>, Requires<[In32BitMode]>;
 }
 
-class FFR1_3<bits<6> funct, bits<5> fmt, RegisterClass RcSrc, 
-              RegisterClass RcDst, string asmstr>: 
-  FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs), 
-      !strconcat(asmstr, " $fd, $fs"), []>; 
+class FFR1_3<bits<6> funct, bits<5> fmt, RegisterClass RcSrc,
+              RegisterClass RcDst, string asmstr>:
+  FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs),
+      !strconcat(asmstr, " $fd, $fs"), []>;
 
 
 multiclass FFR1_4<bits<6> funct, string asmstr, SDNode FOp> {
-  def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), 
-                 (ins FGR32:$fs, FGR32:$ft), 
+  def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd),
+                 (ins FGR32:$fs, FGR32:$ft),
                  !strconcat(asmstr, ".s $fd, $fs, $ft"),
                  [(set FGR32:$fd, (FOp FGR32:$fs, FGR32:$ft))]>;
 
-  def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), 
-                 (ins AFGR64:$fs, AFGR64:$ft), 
+  def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd),
+                 (ins AFGR64:$fs, AFGR64:$ft),
                  !strconcat(asmstr, ".d $fd, $fs, $ft"),
                  [(set AFGR64:$fd, (FOp AFGR64:$fs, AFGR64:$ft))]>,
                  Requires<[In32BitMode]>;
@@ -115,8 +124,8 @@ let ft = 0 in {
   defm TRUNC_W : FFR1_1<0b001101, "trunc.w">;
   defm CVTW    : FFR1_1<0b100100, "cvt.w">;
 
-  defm FABS    : FFR1_2<0b000101, "abs",  fabs>; 
-  defm FNEG    : FFR1_2<0b000111, "neg",  fneg>; 
+  defm FABS    : FFR1_2<0b000101, "abs",  fabs>;
+  defm FNEG    : FFR1_2<0b000111, "neg",  fneg>;
   defm FSQRT   : FFR1_2<0b000100, "sqrt", fsqrt>;
 
   /// Convert to Single Precison
@@ -140,23 +149,23 @@ let ft = 0 in {
     def TRUNC_LD  : FFR1_3<0b001001, 0x1, AFGR64, AFGR64, "trunc.l">;
 
     /// Convert to long signed integer
-    def CVTL_S    : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">; 
-    def CVTL_D    : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">; 
-
-    /// Convert to Double Precison 
-    def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">; 
-    def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">; 
-    def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">; 
-                   
+    def CVTL_S    : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">;
+    def CVTL_D    : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">;
+
+    /// Convert to Double Precison
+    def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">;
+    def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">;
+    def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">;
+
     /// Convert to Single Precison
     def CVTS_D32 : FFR1_3<0b100000, 0x1, FGR32, AFGR64, "cvt.s.d">;
-    def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">; 
+    def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">;
   }
 }
 
 // The odd-numbered registers are only referenced when doing loads,
 // stores, and moves between floating-point and integer registers.
-// When defining instructions, we reference all 32-bit registers, 
+// When defining instructions, we reference all 32-bit registers,
 // regardless of register aliasing.
 let fd = 0 in {
   /// Move Control Registers From/To CPU Registers
@@ -165,7 +174,7 @@ let fd = 0 in {
 
   def CTC1  : FFR<0x11, 0x0, 0x6, (outs CCR:$rt), (ins CPURegs:$fs),
                   "ctc1 $fs, $rt", []>;
-                  
+
   def MFC1  : FFR<0x11, 0x00, 0x00, (outs CPURegs:$rt), (ins FGR32:$fs),
                   "mfc1 $rt, $fs", []>;
 
@@ -180,18 +189,18 @@ def FMOV_D32 : FFR<0x11, 0b000110, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
 
 /// Floating Point Memory Instructions
 let Predicates = [IsNotSingleFloat, IsNotMipsI] in {
-  def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr), 
+  def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr),
                  "ldc1 $ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>;
 
-  def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr), 
+  def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr),
                  "sdc1 $ft, $addr", [(store AFGR64:$ft, addr:$addr)]>;
 }
 
-// LWC1 and SWC1 can always be emited with odd registers.
+// LWC1 and SWC1 can always be emitted with odd registers.
 def LWC1  : FFI<0b110001, (outs FGR32:$ft), (ins mem:$addr), "lwc1 $ft, $addr",
-               [(set FGR32:$ft, (load addr:$addr))]>; 
+               [(set FGR32:$ft, (load addr:$addr))]>;
 def SWC1  : FFI<0b111001, (outs), (ins FGR32:$ft, mem:$addr), "swc1 $ft, $addr",
-               [(store FGR32:$ft, addr:$addr)]>; 
+               [(store FGR32:$ft, addr:$addr)]>;
 
 /// Floating-point Aritmetic
 defm FADD : FFR1_4<0x10, "add", fadd>;
@@ -202,7 +211,7 @@ defm FSUB : FFR1_4<0x01, "sub", fsub>;
 //===----------------------------------------------------------------------===//
 // Floating Point Branch Codes
 //===----------------------------------------------------------------------===//
-// Mips branch codes. These correspond to condcode in MipsInstrInfo.h. 
+// Mips branch codes. These correspond to condcode in MipsInstrInfo.h.
 // They must be kept in synch.
 def MIPS_BRANCH_F  : PatLeaf<(i32 0)>;
 def MIPS_BRANCH_T  : PatLeaf<(i32 1)>;
@@ -210,11 +219,11 @@ def MIPS_BRANCH_FL : PatLeaf<(i32 2)>;
 def MIPS_BRANCH_TL : PatLeaf<(i32 3)>;
 
 /// Floating Point Branch of False/True (Likely)
-let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in {
-  class FBRANCH<PatLeaf op, string asmstr> : FFI<0x11, (outs), 
+let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in
+  class FBRANCH<PatLeaf op, string asmstr> : FFI<0x11, (outs),
         (ins brtarget:$dst), !strconcat(asmstr, " $dst"),
-        [(MipsFPBrcond op, bb:$dst, FCR31)]>;
-}
+        [(MipsFPBrcond op, bb:$dst)]>;
+
 def BC1F  : FBRANCH<MIPS_BRANCH_F,  "bc1f">;
 def BC1T  : FBRANCH<MIPS_BRANCH_T,  "bc1t">;
 def BC1FL : FBRANCH<MIPS_BRANCH_FL, "bc1fl">;
@@ -223,11 +232,11 @@ def BC1TL : FBRANCH<MIPS_BRANCH_TL, "bc1tl">;
 //===----------------------------------------------------------------------===//
 // Floating Point Flag Conditions
 //===----------------------------------------------------------------------===//
-// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h. 
+// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h.
 // They must be kept in synch.
 def MIPS_FCOND_F    : PatLeaf<(i32 0)>;
 def MIPS_FCOND_UN   : PatLeaf<(i32 1)>;
-def MIPS_FCOND_EQ   : PatLeaf<(i32 2)>;
+def MIPS_FCOND_OEQ  : PatLeaf<(i32 2)>;
 def MIPS_FCOND_UEQ  : PatLeaf<(i32 3)>;
 def MIPS_FCOND_OLT  : PatLeaf<(i32 4)>;
 def MIPS_FCOND_ULT  : PatLeaf<(i32 5)>;
@@ -245,44 +254,90 @@ def MIPS_FCOND_NGT  : PatLeaf<(i32 15)>;
 /// Floating Point Compare
 let hasDelaySlot = 1, Defs=[FCR31] in {
   def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc),
-      "c.$cc.s $fs, $ft",
-        [(set FCR31, (MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc))]>; 
-  
+                     "c.$cc.s $fs, $ft",
+                     [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc)]>;
+
   def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc),
-      "c.$cc.d $fs, $ft",
-       [(set FCR31, (MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc))]>,
-      Requires<[In32BitMode]>;
+                     "c.$cc.d $fs, $ft",
+                     [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc)]>,
+                     Requires<[In32BitMode]>;
 }
 
-//===----------------------------------------------------------------------===//
-// Floating Point Pseudo-Instructions
-//===----------------------------------------------------------------------===//
 
-// For some explanation, see Select_CC at MipsInstrInfo.td. We also embedd a 
-// condiciton code to enable easy handling by the Custom Inserter.
-let usesCustomInserter = 1, Uses=[FCR31] in {
-  class PseudoFPSelCC<RegisterClass RC, string asmstr> : 
-    MipsPseudo<(outs RC:$dst), 
-               (ins CPURegs:$CmpRes, RC:$T, RC:$F, condcode:$cc), asmstr, 
-               [(set RC:$dst, (MipsFPSelectCC CPURegs:$CmpRes, RC:$T, RC:$F,
-                 imm:$cc))]>;
+// Conditional moves:
+// These instructions are expanded in
+// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
+// conditional move instructions.
+// flag:int, data:float
+let usesCustomInserter = 1, Constraints = "$F = $dst" in
+class CondMovIntFP<RegisterClass RC, bits<5> fmt, bits<6> func,
+                   string instr_asm> :
+  FFR<0x11, func, fmt, (outs RC:$dst), (ins RC:$T, CPURegs:$cond, RC:$F),
+      !strconcat(instr_asm, "\t$dst, $T, $cond"), []>;
+
+def MOVZ_S : CondMovIntFP<FGR32, 16, 18, "movz.s">;
+def MOVN_S : CondMovIntFP<FGR32, 16, 19, "movn.s">;
+
+let Predicates = [In32BitMode] in {
+  def MOVZ_D : CondMovIntFP<AFGR64, 17, 18, "movz.d">;
+  def MOVN_D : CondMovIntFP<AFGR64, 17, 19, "movn.d">;
 }
 
-// The values to be selected are fp but the condition test is with integers.
-def Select_CC_S32 : PseudoSelCC<FGR32, "# MipsSelect_CC_S32_f32">;
-def Select_CC_D32 : PseudoSelCC<AFGR64, "# MipsSelect_CC_D32_f32">,
-                    Requires<[In32BitMode]>;
+defm : MovzPats<FGR32, MOVZ_S>;
+defm : MovnPats<FGR32, MOVN_S>;
+
+let Predicates = [In32BitMode] in {
+  defm : MovzPats<AFGR64, MOVZ_D>;
+  defm : MovnPats<AFGR64, MOVN_D>;
+}
 
-// The values to be selected are int but the condition test is done with fp.
-def Select_FCC     : PseudoFPSelCC<CPURegs, "# MipsSelect_FCC">;
+let usesCustomInserter = 1, Uses = [FCR31], Constraints = "$F = $dst" in {
+// flag:float, data:int
+class CondMovFPInt<SDNode cmov, bits<1> tf, string instr_asm> :
+  FCMOV<tf, (outs CPURegs:$dst), (ins CPURegs:$T, CPURegs:$F),
+        !strconcat(instr_asm, "\t$dst, $T, $$fcc0"),
+        [(set CPURegs:$dst, (cmov CPURegs:$T, CPURegs:$F))]>;
+
+// flag:float, data:float
+class CondMovFPFP<RegisterClass RC, SDNode cmov, bits<5> fmt, bits<1> tf,
+                  string instr_asm> :
+  FFCMOV<fmt, tf, (outs RC:$dst), (ins RC:$T, RC:$F),
+         !strconcat(instr_asm, "\t$dst, $T, $$fcc0"),
+         [(set RC:$dst, (cmov RC:$T, RC:$F))]>;
+}
 
-// The values to be selected and the condition test is done with fp.
-def Select_FCC_S32 : PseudoFPSelCC<FGR32, "# MipsSelect_FCC_S32_f32">;
-def Select_FCC_D32 : PseudoFPSelCC<AFGR64, "# MipsSelect_FCC_D32_f32">, 
-                     Requires<[In32BitMode]>;
+def MOVT : CondMovFPInt<MipsCMovFP_T, 1, "movt">;
+def MOVF : CondMovFPInt<MipsCMovFP_F, 0, "movf">;
+def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
+def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
 
-def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src), 
-                             "# MOVCCRToCCR", []>; 
+let Predicates = [In32BitMode] in {
+  def MOVT_D : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
+  def MOVF_D : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Pseudo-Instructions
+//===----------------------------------------------------------------------===//
+def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src),
+                             "# MOVCCRToCCR", []>;
+
+// This pseudo instr gets expanded into 2 mtc1 instrs after register
+// allocation.
+def BuildPairF64 :
+  MipsPseudo<(outs AFGR64:$dst),
+             (ins CPURegs:$lo, CPURegs:$hi), "",
+             [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
+
+// This pseudo instr gets expanded into 2 mfc1 instrs after register
+// allocation.
+// if n is 0, lower part of src is extracted.
+// if n is 1, higher part of src is extracted.
+def ExtractElementF64 :
+  MipsPseudo<(outs CPURegs:$dst),
+             (ins AFGR64:$src, i32imm:$n), "",
+             [(set CPURegs:$dst,
+               (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
 
 //===----------------------------------------------------------------------===//
 // Floating Point Patterns
@@ -306,7 +361,7 @@ def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S32 FGR32:$src))>;
 def : Pat<(i32 (bitconvert FGR32:$src)),  (MFC1 FGR32:$src)>;
 def : Pat<(f32 (bitconvert CPURegs:$src)), (MTC1 CPURegs:$src)>;
 
-let Predicates = [In32BitMode] in { 
+let Predicates = [In32BitMode] in {
   def : Pat<(f32 (fround AFGR64:$src)), (CVTS_D32 AFGR64:$src)>;
   def : Pat<(f64 (fextend FGR32:$src)), (CVTD_S32 FGR32:$src)>;
 }
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 98ae2fa..9dfcdfb 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -22,8 +22,8 @@
 //===----------------------------------------------------------------------===//
 
 // Generic Mips Format
-class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, 
-               InstrItinClass itin>: Instruction 
+class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
+               InstrItinClass itin>: Instruction
 {
   field bits<32> Inst;
 
@@ -32,8 +32,8 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
   bits<6> opcode;
 
   // Top 5 bits are the 'opcode' field
-  let Inst{31-26} = opcode;   
-  
+  let Inst{31-26} = opcode;
+
   dag OutOperandList = outs;
   dag InOperandList  = ins;
 
@@ -52,7 +52,7 @@ class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
 
 class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
          list<dag> pattern, InstrItinClass itin>:
-      MipsInst<outs, ins, asmstr, pattern, itin> 
+      MipsInst<outs, ins, asmstr, pattern, itin>
 {
   bits<5>  rd;
   bits<5>  rs;
@@ -64,7 +64,7 @@ class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
   let funct  = _funct;
 
   let Inst{25-21} = rs;
-  let Inst{20-16} = rt; 
+  let Inst{20-16} = rt;
   let Inst{15-11} = rd;
   let Inst{10-6}  = shamt;
   let Inst{5-0}   = funct;
@@ -75,7 +75,7 @@ class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
 //===----------------------------------------------------------------------===//
 
 class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
-         InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin> 
+         InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
 {
   bits<5>  rt;
   bits<5>  rs;
@@ -84,7 +84,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
   let opcode = op;
 
   let Inst{25-21} = rs;
-  let Inst{20-16} = rt; 
+  let Inst{20-16} = rt;
   let Inst{15-0}  = imm16;
 }
 
@@ -93,12 +93,12 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
 //===----------------------------------------------------------------------===//
 
 class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
-         InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin> 
+         InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
 {
   bits<26> addr;
 
   let opcode = op;
-  
+
   let Inst{25-0} = addr;
 }
 
@@ -119,9 +119,9 @@ class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
 // Format FR instruction class in Mips : <|opcode|fmt|ft|fs|fd|funct|>
 //===----------------------------------------------------------------------===//
 
-class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, 
-          string asmstr, list<dag> pattern> : 
-          MipsInst<outs, ins, asmstr, pattern, NoItinerary> 
+class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
+          string asmstr, list<dag> pattern> :
+          MipsInst<outs, ins, asmstr, pattern, NoItinerary>
 {
   bits<5>  fd;
   bits<5>  fs;
@@ -134,7 +134,7 @@ class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
   let fmt    = _fmt;
 
   let Inst{25-21} = fmt;
-  let Inst{20-16} = ft; 
+  let Inst{20-16} = ft;
   let Inst{15-11} = fs;
   let Inst{10-6}  = fd;
   let Inst{5-0}   = funct;
@@ -144,8 +144,8 @@ class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
 // Format FI instruction class in Mips : <|opcode|base|ft|immediate|>
 //===----------------------------------------------------------------------===//
 
-class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>: 
-          MipsInst<outs, ins, asmstr, pattern, NoItinerary> 
+class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
+          MipsInst<outs, ins, asmstr, pattern, NoItinerary>
 {
   bits<5>  ft;
   bits<5>  base;
@@ -154,7 +154,7 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
   let opcode = op;
 
   let Inst{25-21} = base;
-  let Inst{20-16} = ft; 
+  let Inst{20-16} = ft;
   let Inst{15-0}  = imm16;
 }
 
@@ -162,8 +162,8 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
 // Compare instruction class in Mips : <|010001|fmt|ft|fs|0000011|condcode|>
 //===----------------------------------------------------------------------===//
 
-class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> : 
-          MipsInst<outs, ins, asmstr, pattern, NoItinerary> 
+class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
+          MipsInst<outs, ins, asmstr, pattern, NoItinerary>
 {
   bits<5>  fs;
   bits<5>  ft;
@@ -174,9 +174,54 @@ class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
   let fmt    = _fmt;
 
   let Inst{25-21} = fmt;
-  let Inst{20-16} = ft; 
+  let Inst{20-16} = ft;
   let Inst{15-11} = fs;
   let Inst{10-6}  = 0;
   let Inst{5-4}   = 0b11;
   let Inst{3-0}   = cc;
 }
+
+
+class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
+            list<dag> pattern> :
+  MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+  bits<5>  rd;
+  bits<5>  rs;
+  bits<3>  N;
+  bits<1>  tf;
+
+  let opcode = 0;
+  let tf = _tf;
+
+  let Inst{25-21} = rs;
+  let Inst{20-18} = N;
+  let Inst{17} = 0;
+  let Inst{16} = tf;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = 0;
+  let Inst{5-0}   = 1;
+}
+
+class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr,
+             list<dag> pattern> :
+  MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+  bits<5>  fd;
+  bits<5>  fs;
+  bits<3>  N;
+  bits<5>  fmt;
+  bits<1>  tf;
+
+  let opcode = 17;
+  let fmt = _fmt;
+  let tf = _tf;
+
+  let Inst{25-21} = fmt;
+  let Inst{20-18} = N;
+  let Inst{17} = 0;
+  let Inst{16} = tf;
+  let Inst{15-11} = fs;
+  let Inst{10-6}  = fd;
+  let Inst{5-0}   = 17;
+}
+\ No newline at end of file
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index aaf307b..be044fa 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -36,7 +36,7 @@ static bool isZeroImm(const MachineOperand &op) {
 /// not, return 0.  This predicate must return 0 if the instruction has
 /// any side effects other than loading from the stack slot.
 unsigned MipsInstrInfo::
-isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const 
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
 {
   if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) ||
       (MI->getOpcode() == Mips::LDC1)) {
@@ -57,7 +57,7 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
 /// not, return 0.  This predicate must return 0 if the instruction has
 /// any side effects other than storing to the stack slot.
 unsigned MipsInstrInfo::
-isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const 
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
 {
   if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) ||
       (MI->getOpcode() == Mips::SDC1)) {
@@ -74,7 +74,7 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
 /// insertNoop - If data hazard condition is found insert the target nop
 /// instruction.
 void MipsInstrInfo::
-insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const 
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
 {
   DebugLoc DL;
   BuildMI(MBB, MI, DL, get(Mips::NOP));
@@ -136,7 +136,7 @@ copyPhysReg(MachineBasicBlock &MBB,
       .addReg(SrcReg, getKillRegState(KillSrc));
     return;
   }
-  
+
   if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) {
     BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg)
       .addReg(SrcReg, getKillRegState(KillSrc));
@@ -153,13 +153,13 @@ copyPhysReg(MachineBasicBlock &MBB,
 
 void MipsInstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                    unsigned SrcReg, bool isKill, int FI, 
+                    unsigned SrcReg, bool isKill, int FI,
                     const TargetRegisterClass *RC,
                     const TargetRegisterInfo *TRI) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
-  if (RC == Mips::CPURegsRegisterClass) 
+  if (RC == Mips::CPURegsRegisterClass)
     BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill))
           .addImm(0).addFrameIndex(FI);
   else if (RC == Mips::FGR32RegisterClass)
@@ -171,7 +171,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
         .addReg(SrcReg, getKillRegState(isKill))
         .addImm(0).addFrameIndex(FI);
     } else {
-      const TargetRegisterInfo *TRI = 
+      const TargetRegisterInfo *TRI =
         MBB.getParent()->getTarget().getRegisterInfo();
       const unsigned *SubSet = TRI->getSubRegisters(SrcReg);
       BuildMI(MBB, I, DL, get(Mips::SWC1))
@@ -189,12 +189,12 @@ void MipsInstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
                      const TargetRegisterClass *RC,
-                     const TargetRegisterInfo *TRI) const 
+                     const TargetRegisterInfo *TRI) const
 {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
-  if (RC == Mips::CPURegsRegisterClass) 
+  if (RC == Mips::CPURegsRegisterClass)
     BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addImm(0).addFrameIndex(FI);
   else if (RC == Mips::FGR32RegisterClass)
     BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addImm(0).addFrameIndex(FI);
@@ -202,7 +202,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
       BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addImm(0).addFrameIndex(FI);
     } else {
-      const TargetRegisterInfo *TRI = 
+      const TargetRegisterInfo *TRI =
         MBB.getParent()->getTarget().getRegisterInfo();
       const unsigned *SubSet = TRI->getSubRegisters(DestReg);
       BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0])
@@ -218,281 +218,202 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 // Branch Analysis
 //===----------------------------------------------------------------------===//
 
-/// GetCondFromBranchOpc - Return the Mips CC that matches 
-/// the correspondent Branch instruction opcode.
-static Mips::CondCode GetCondFromBranchOpc(unsigned BrOpc) 
-{
-  switch (BrOpc) {
-  default: return Mips::COND_INVALID;
-  case Mips::BEQ  : return Mips::COND_E;
-  case Mips::BNE  : return Mips::COND_NE;
-  case Mips::BGTZ : return Mips::COND_GZ;
-  case Mips::BGEZ : return Mips::COND_GEZ;
-  case Mips::BLTZ : return Mips::COND_LZ;
-  case Mips::BLEZ : return Mips::COND_LEZ;
-
-  // We dont do fp branch analysis yet!  
-  case Mips::BC1T : 
-  case Mips::BC1F : return Mips::COND_INVALID;
-  }
+static unsigned GetAnalyzableBrOpc(unsigned Opc) {
+  return (Opc == Mips::BEQ  || Opc == Mips::BNE  || Opc == Mips::BGTZ ||
+          Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
+          Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ? Opc : 0;
 }
 
-/// GetCondBranchFromCond - Return the Branch instruction
-/// opcode that matches the cc.
-unsigned Mips::GetCondBranchFromCond(Mips::CondCode CC) 
+/// GetOppositeBranchOpc - Return the inverse of the specified
+/// opcode, e.g. turning BEQ to BNE.
+unsigned Mips::GetOppositeBranchOpc(unsigned Opc)
 {
-  switch (CC) {
-  default: llvm_unreachable("Illegal condition code!");
-  case Mips::COND_E   : return Mips::BEQ;
-  case Mips::COND_NE  : return Mips::BNE;
-  case Mips::COND_GZ  : return Mips::BGTZ;
-  case Mips::COND_GEZ : return Mips::BGEZ;
-  case Mips::COND_LZ  : return Mips::BLTZ;
-  case Mips::COND_LEZ : return Mips::BLEZ;
-
-  case Mips::FCOND_F:
-  case Mips::FCOND_UN:
-  case Mips::FCOND_EQ:
-  case Mips::FCOND_UEQ:
-  case Mips::FCOND_OLT:
-  case Mips::FCOND_ULT:
-  case Mips::FCOND_OLE:
-  case Mips::FCOND_ULE:
-  case Mips::FCOND_SF:
-  case Mips::FCOND_NGLE:
-  case Mips::FCOND_SEQ:
-  case Mips::FCOND_NGL:
-  case Mips::FCOND_LT:
-  case Mips::FCOND_NGE:
-  case Mips::FCOND_LE:
-  case Mips::FCOND_NGT: return Mips::BC1T;
-
-  case Mips::FCOND_T:
-  case Mips::FCOND_OR:
-  case Mips::FCOND_NEQ:
-  case Mips::FCOND_OGL:
-  case Mips::FCOND_UGE:
-  case Mips::FCOND_OGE:
-  case Mips::FCOND_UGT:
-  case Mips::FCOND_OGT:
-  case Mips::FCOND_ST:
-  case Mips::FCOND_GLE:
-  case Mips::FCOND_SNE:
-  case Mips::FCOND_GL:
-  case Mips::FCOND_NLT:
-  case Mips::FCOND_GE:
-  case Mips::FCOND_NLE:
-  case Mips::FCOND_GT: return Mips::BC1F;
+  switch (Opc) {
+  default: llvm_unreachable("Illegal opcode!");
+  case Mips::BEQ  : return Mips::BNE;
+  case Mips::BNE  : return Mips::BEQ;
+  case Mips::BGTZ : return Mips::BLEZ;
+  case Mips::BGEZ : return Mips::BLTZ;
+  case Mips::BLTZ : return Mips::BGEZ;
+  case Mips::BLEZ : return Mips::BGTZ;
+  case Mips::BC1T : return Mips::BC1F;
+  case Mips::BC1F : return Mips::BC1T;
   }
 }
 
-/// GetOppositeBranchCondition - Return the inverse of the specified 
-/// condition, e.g. turning COND_E to COND_NE.
-Mips::CondCode Mips::GetOppositeBranchCondition(Mips::CondCode CC) 
-{
-  switch (CC) {
-  default: llvm_unreachable("Illegal condition code!");
-  case Mips::COND_E   : return Mips::COND_NE;
-  case Mips::COND_NE  : return Mips::COND_E;
-  case Mips::COND_GZ  : return Mips::COND_LEZ;
-  case Mips::COND_GEZ : return Mips::COND_LZ;
-  case Mips::COND_LZ  : return Mips::COND_GEZ;
-  case Mips::COND_LEZ : return Mips::COND_GZ;
-  case Mips::FCOND_F  : return Mips::FCOND_T;
-  case Mips::FCOND_UN : return Mips::FCOND_OR;
-  case Mips::FCOND_EQ : return Mips::FCOND_NEQ;
-  case Mips::FCOND_UEQ: return Mips::FCOND_OGL;
-  case Mips::FCOND_OLT: return Mips::FCOND_UGE;
-  case Mips::FCOND_ULT: return Mips::FCOND_OGE;
-  case Mips::FCOND_OLE: return Mips::FCOND_UGT;
-  case Mips::FCOND_ULE: return Mips::FCOND_OGT;
-  case Mips::FCOND_SF:  return Mips::FCOND_ST;
-  case Mips::FCOND_NGLE:return Mips::FCOND_GLE;
-  case Mips::FCOND_SEQ: return Mips::FCOND_SNE;
-  case Mips::FCOND_NGL: return Mips::FCOND_GL;
-  case Mips::FCOND_LT:  return Mips::FCOND_NLT;
-  case Mips::FCOND_NGE: return Mips::FCOND_GE;
-  case Mips::FCOND_LE:  return Mips::FCOND_NLE;
-  case Mips::FCOND_NGT: return Mips::FCOND_GT;
-  }
+static void AnalyzeCondBr(const MachineInstr* Inst, unsigned Opc,
+                          MachineBasicBlock *&BB,
+                          SmallVectorImpl<MachineOperand>& Cond) {
+  assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch");
+  int NumOp = Inst->getNumExplicitOperands();
+  
+  // for both int and fp branches, the last explicit operand is the
+  // MBB.
+  BB = Inst->getOperand(NumOp-1).getMBB();
+  Cond.push_back(MachineOperand::CreateImm(Opc));
+
+  for (int i=0; i<NumOp-1; i++)
+    Cond.push_back(Inst->getOperand(i));
 }
 
-bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 
+bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                                   MachineBasicBlock *&TBB,
                                   MachineBasicBlock *&FBB,
                                   SmallVectorImpl<MachineOperand> &Cond,
-                                  bool AllowModify) const 
+                                  bool AllowModify) const
 {
-  // If the block has no terminators, it just falls into the block after it.
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin())
+  MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
+
+  // Skip all the debug instructions.
+  while (I != REnd && I->isDebugValue())
+    ++I;
+
+  if (I == REnd || !isUnpredicatedTerminator(&*I)) {
+    // If this block ends with no branches (it just falls through to its succ)
+    // just return false, leaving TBB/FBB null.
+    TBB = FBB = NULL;
     return false;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return false;
-    --I;
   }
-  if (!isUnpredicatedTerminator(I))
-    return false;
-  
-  // Get the last instruction in the block.
-  MachineInstr *LastInst = I;
-  
-  // If there is only one terminator instruction, process it.
+
+  MachineInstr *LastInst = &*I;
   unsigned LastOpc = LastInst->getOpcode();
-  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
-    if (!LastInst->getDesc().isBranch())
+
+  // Not an analyzable branch (must be an indirect jump).
+  if (!GetAnalyzableBrOpc(LastOpc))
+    return true;
+
+  // Get the second to last instruction in the block.
+  unsigned SecondLastOpc = 0;
+  MachineInstr *SecondLastInst = NULL;
+
+  if (++I != REnd) {
+    SecondLastInst = &*I;
+    SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode());
+
+    // Not an analyzable branch (must be an indirect jump).
+    if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc)
       return true;
+  }
 
+  // If there is only one terminator instruction, process it.
+  if (!SecondLastOpc) {
     // Unconditional branch
     if (LastOpc == Mips::J) {
       TBB = LastInst->getOperand(0).getMBB();
       return false;
     }
 
-    Mips::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
-    if (BranchCode == Mips::COND_INVALID)
-      return true;  // Can't handle indirect branch.
-
     // Conditional branch
-    // Block ends with fall-through condbranch.
-    if (LastOpc != Mips::COND_INVALID) {
-      int LastNumOp = LastInst->getNumOperands();
-
-      TBB = LastInst->getOperand(LastNumOp-1).getMBB();
-      Cond.push_back(MachineOperand::CreateImm(BranchCode));
-
-      for (int i=0; i<LastNumOp-1; i++) {
-        Cond.push_back(LastInst->getOperand(i));
-      }
-
-      return false;
-    }
+    AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
+    return false;
   }
-  
-  // Get the instruction before it if it is a terminator.
-  MachineInstr *SecondLastInst = I;
-  
+
+  // If we reached here, there are two branches.
   // If there are three terminators, we don't know what sort of block this is.
-  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+  if (++I != REnd && isUnpredicatedTerminator(&*I))
     return true;
 
-  // If the block ends with Mips::J and a Mips::BNE/Mips::BEQ, handle it.
-  unsigned SecondLastOpc    = SecondLastInst->getOpcode();
-  Mips::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
+  // If second to last instruction is an unconditional branch,
+  // analyze it and remove the last instruction.
+  if (SecondLastOpc == Mips::J) {
+    // Return if the last instruction cannot be removed.
+    if (!AllowModify)
+      return true;
 
-  if (BranchCode != Mips::COND_INVALID && LastOpc == Mips::J) {
-    int SecondNumOp = SecondLastInst->getNumOperands();
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    LastInst->eraseFromParent();
+    return false;
+  }
 
-    TBB = SecondLastInst->getOperand(SecondNumOp-1).getMBB();
-    Cond.push_back(MachineOperand::CreateImm(BranchCode));
+  // Conditional branch followed by an unconditional branch.
+  // The last one must be unconditional.
+  if (LastOpc != Mips::J)
+    return true;
 
-    for (int i=0; i<SecondNumOp-1; i++) {
-      Cond.push_back(SecondLastInst->getOperand(i));
-    }
+  AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
+  FBB = LastInst->getOperand(0).getMBB();
 
-    FBB = LastInst->getOperand(0).getMBB();
-    return false;
-  }
+  return false;
+} 
   
-  // If the block ends with two unconditional branches, handle it. The last 
-  // one is not executed, so remove it.
-  if ((SecondLastOpc == Mips::J) && (LastOpc == Mips::J)) {
-    TBB = SecondLastInst->getOperand(0).getMBB();
-    I = LastInst;
-    if (AllowModify)
-      I->eraseFromParent();
-    return false;
-  }
+void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB,
+                                MachineBasicBlock *TBB, DebugLoc DL,
+                                const SmallVectorImpl<MachineOperand>& Cond)
+  const {
+  unsigned Opc = Cond[0].getImm();
+  const TargetInstrDesc &TID = get(Opc);
+  MachineInstrBuilder MIB = BuildMI(&MBB, DL, TID);
 
-  // Otherwise, can't handle this.
-  return true;
+  for (unsigned i = 1; i < Cond.size(); ++i)
+    MIB.addReg(Cond[i].getReg());
+
+  MIB.addMBB(TBB);
 }
 
 unsigned MipsInstrInfo::
-InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
              MachineBasicBlock *FBB,
              const SmallVectorImpl<MachineOperand> &Cond,
              DebugLoc DL) const {
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-  assert((Cond.size() == 3 || Cond.size() == 2 || Cond.size() == 0) &&
-         "Mips branch conditions can have two|three components!");
 
-  if (FBB == 0) { // One way branch.
-    if (Cond.empty()) {
-      // Unconditional branch?
-      BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB);
-    } else {
-      // Conditional branch.
-      unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm());
-      const TargetInstrDesc &TID = get(Opc);
-
-      if (TID.getNumOperands() == 3)
-        BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg())
-                          .addReg(Cond[2].getReg())
-                          .addMBB(TBB);
-      else
-        BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg())
-                          .addMBB(TBB);
-
-    }                             
-    return 1;
-  }
-  
-  // Two-way Conditional branch.
-  unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm());
-  const TargetInstrDesc &TID = get(Opc);
+  // # of condition operands:
+  //  Unconditional branches: 0
+  //  Floating point branches: 1 (opc)
+  //  Int BranchZero: 2 (opc, reg)
+  //  Int Branch: 3 (opc, reg0, reg1)
+  assert((Cond.size() <= 3) &&
+         "# of Mips branch conditions must be <= 3!");
 
-  if (TID.getNumOperands() == 3)
-    BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg())
-                      .addMBB(TBB);
-  else
-    BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addMBB(TBB);
+  // Two-way Conditional branch.
+  if (FBB) {
+    BuildCondBr(MBB, TBB, DL, Cond);
+    BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB);
+    return 2;
+  }
 
-  BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB);
-  return 2;
+  // One way branch.
+  // Unconditional branch.
+  if (Cond.empty())
+    BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB);
+  else // Conditional branch.
+    BuildCondBr(MBB, TBB, DL, Cond);
+  return 1;
 }
 
 unsigned MipsInstrInfo::
-RemoveBranch(MachineBasicBlock &MBB) const 
+RemoveBranch(MachineBasicBlock &MBB) const
 {
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin()) return 0;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return 0;
-    --I;
-  }
-  if (I->getOpcode() != Mips::J && 
-      GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID)
-    return 0;
-  
-  // Remove the branch.
-  I->eraseFromParent();
-  
-  I = MBB.end();
-  
-  if (I == MBB.begin()) return 1;
-  --I;
-  if (GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID)
-    return 1;
-  
-  // Remove the branch.
-  I->eraseFromParent();
-  return 2;
+  MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
+  MachineBasicBlock::reverse_iterator FirstBr;
+  unsigned removed;
+
+  // Skip all the debug instructions.
+  while (I != REnd && I->isDebugValue())
+    ++I;
+
+  FirstBr = I;
+
+  // Up to 2 branches are removed.
+  // Note that indirect branches are not removed.
+  for(removed = 0; I != REnd && removed < 2; ++I, ++removed)
+    if (!GetAnalyzableBrOpc(I->getOpcode()))
+      break;
+
+  MBB.erase(I.base(), FirstBr.base());
+
+  return removed;
 }
 
-/// ReverseBranchCondition - Return the inverse opcode of the 
+/// ReverseBranchCondition - Return the inverse opcode of the
 /// specified Branch instruction.
 bool MipsInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const 
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
 {
-  assert( (Cond.size() == 3 || Cond.size() == 2) && 
+  assert( (Cond.size() && Cond.size() <= 3) &&
           "Invalid Mips branch condition!");
-  Cond[0].setImm(GetOppositeBranchCondition((Mips::CondCode)Cond[0].getImm()));
+  Cond[0].setImm(Mips::GetOppositeBranchOpc(Cond[0].getImm()));
   return false;
 }
 
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 52a3d39..5fdbf1f 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -37,7 +37,7 @@ namespace Mips {
     // To be used with float branch True
     FCOND_F,
     FCOND_UN,
-    FCOND_EQ,
+    FCOND_OEQ,
     FCOND_UEQ,
     FCOND_OLT,
     FCOND_ULT,
@@ -57,8 +57,8 @@ namespace Mips {
     // above ones, but are used with a branch False;
     FCOND_T,
     FCOND_OR,
-    FCOND_NEQ,
-    FCOND_OGL,
+    FCOND_UNE,
+    FCOND_ONE,
     FCOND_UGE,
     FCOND_OGE,
     FCOND_UGT,
@@ -70,27 +70,15 @@ namespace Mips {
     FCOND_NLT,
     FCOND_GE,
     FCOND_NLE,
-    FCOND_GT,
-
-    // Only integer conditions
-    COND_E,
-    COND_GZ,
-    COND_GEZ,
-    COND_LZ,
-    COND_LEZ,
-    COND_NE,
-    COND_INVALID
+    FCOND_GT
   };
-  
-  // Turn condition code into conditional branch opcode.
-  unsigned GetCondBranchFromCond(CondCode CC);
 
-  /// GetOppositeBranchCondition - Return the inverse of the specified cond,
-  /// e.g. turning COND_E to COND_NE.
-  CondCode GetOppositeBranchCondition(Mips::CondCode CC);
+  /// GetOppositeBranchOpc - Return the inverse of the specified
+  /// opcode, e.g. turning BEQ to BNE.
+  unsigned GetOppositeBranchOpc(unsigned Opc);
 
   /// MipsCCToString - Map each FP condition code to its string
-  inline static const char *MipsFCCToString(Mips::CondCode CC) 
+  inline static const char *MipsFCCToString(Mips::CondCode CC)
   {
     switch (CC) {
       default: llvm_unreachable("Unknown condition code");
@@ -98,10 +86,10 @@ namespace Mips {
       case FCOND_T:   return "f";
       case FCOND_UN:
       case FCOND_OR:  return "un";
-      case FCOND_EQ: 
-      case FCOND_NEQ: return "eq";
+      case FCOND_OEQ:
+      case FCOND_UNE: return "eq";
       case FCOND_UEQ:
-      case FCOND_OGL: return "ueq";
+      case FCOND_ONE: return "ueq";
       case FCOND_OLT:
       case FCOND_UGE: return "olt";
       case FCOND_ULT:
@@ -121,11 +109,11 @@ namespace Mips {
       case FCOND_LT:
       case FCOND_NLT: return "lt";
       case FCOND_NGE:
-      case FCOND_GE:  return "ge";
+      case FCOND_GE:  return "nge";
       case FCOND_LE:
-      case FCOND_NLE: return "nle";
+      case FCOND_NLE: return "le";
       case FCOND_NGT:
-      case FCOND_GT:  return "gt";
+      case FCOND_GT:  return "ngt";
     }
   }
 }
@@ -138,27 +126,27 @@ namespace MipsII {
   enum TOF {
     //===------------------------------------------------------------------===//
     // Mips Specific MachineOperand flags.
- 
+
     MO_NO_FLAG,
 
     /// MO_GOT - Represents the offset into the global offset table at which
     /// the address the relocation entry symbol resides during execution.
     MO_GOT,
 
-    /// MO_GOT_CALL - Represents the offset into the global offset table at 
-    /// which the address of a call site relocation entry symbol resides 
+    /// MO_GOT_CALL - Represents the offset into the global offset table at
+    /// which the address of a call site relocation entry symbol resides
     /// during execution. This is different from the above since this flag
     /// can only be present in call instructions.
     MO_GOT_CALL,
 
-    /// MO_GPREL - Represents the offset from the current gp value to be used 
+    /// MO_GPREL - Represents the offset from the current gp value to be used
     /// for the relocatable object file being produced.
     MO_GPREL,
 
-    /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
-    /// address. 
-    MO_ABS_HILO
-
+    /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol
+    /// address.
+    MO_ABS_HI,
+    MO_ABS_LO
   };
 }
 
@@ -181,7 +169,7 @@ public:
   /// any side effects other than loading from the stack slot.
   virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
                                        int &FrameIndex) const;
-  
+
   /// isStoreToStackSlot - If the specified machine instruction is a direct
   /// store to a stack slot, return the virtual or physical register number of
   /// the source reg along with the FrameIndex of the loaded stack slot.  If
@@ -189,13 +177,19 @@ public:
   /// any side effects other than storing to the stack slot.
   virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
                                       int &FrameIndex) const;
- 
+
   /// Branch Analysis
   virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                              MachineBasicBlock *&FBB,
                              SmallVectorImpl<MachineOperand> &Cond,
                              bool AllowModify) const;
   virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+private:
+  void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL,
+                   const SmallVectorImpl<MachineOperand>& Cond) const;
+
+public:
   virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                                 MachineBasicBlock *FBB,
                                 const SmallVectorImpl<MachineOperand> &Cond,
@@ -220,7 +214,7 @@ public:
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
 
   /// Insert nop instruction when hazard condition is found
-  virtual void insertNoop(MachineBasicBlock &MBB, 
+  virtual void insertNoop(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI) const;
 
   /// getGlobalBaseReg - Return a virtual register initialized with the
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index b70266a..19b9c35 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -19,18 +19,19 @@ include "MipsInstrFormats.td"
 
 def SDT_MipsRet          : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 def SDT_MipsJmpLink      : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
-def SDT_MipsSelectCC     : SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>,
-                                         SDTCisSameAs<2, 3>, SDTCisInt<1>]>;
 def SDT_MipsCMov         : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
-                                         SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>,
-                                         SDTCisInt<4>]>;
+                                                SDTCisSameAs<1, 2>,
+                                                SDTCisSameAs<3, 4>,
+                                                SDTCisInt<4>]>;
 def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
 def SDT_MipsCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def SDT_MipsMAddMSub     : SDTypeProfile<0, 4, 
+def SDT_MipsMAddMSub     : SDTypeProfile<0, 4,
                                          [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
-                                          SDTCisSameAs<1, 2>, 
+                                          SDTCisSameAs<1, 2>,
                                           SDTCisSameAs<2, 3>]>;
-
+def SDT_MipsDivRem       : SDTypeProfile<0, 2,
+                                         [SDTCisVT<0, i32>,
+                                          SDTCisSameAs<0, 1>]>;
 
 // Call
 def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
@@ -54,9 +55,6 @@ def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
 def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
-// Select Condition Code
-def MipsSelectCC  : SDNode<"MipsISD::SelectCC", SDT_MipsSelectCC>;
-
 // MAdd*/MSub* nodes
 def MipsMAdd      : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub,
                            [SDNPOptInGlue, SDNPOutGlue]>;
@@ -67,6 +65,12 @@ def MipsMSub      : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub,
 def MipsMSubu     : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub,
                            [SDNPOptInGlue, SDNPOutGlue]>;
 
+// DivRem(u) nodes
+def MipsDivRem    : SDNode<"MipsISD::DivRem", SDT_MipsDivRem,
+                           [SDNPOutGlue]>;
+def MipsDivRemU   : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
+                           [SDNPOutGlue]>;
+
 //===----------------------------------------------------------------------===//
 // Mips Instruction Predicate Definitions.
 //===----------------------------------------------------------------------===//
@@ -165,7 +169,7 @@ class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode,
 let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in
 class MArithR<bits<6> func, string instr_asm, SDNode op> :
   FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
-     !strconcat(instr_asm, "\t$rs, $rt"), 
+     !strconcat(instr_asm, "\t$rs, $rt"),
      [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul>;
 
 //  Logical
@@ -185,7 +189,7 @@ class LogicNOR<bits<6> op, bits<6> func, string instr_asm>:
      [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>;
 
 // Shifts
-class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm, 
+class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm,
                               SDNode OpNode>:
   FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, shamt:$c),
      !strconcat(instr_asm, "\t$dst, $b, $c"),
@@ -193,7 +197,7 @@ class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm,
   let rs = _rs;
 }
 
-class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm, 
+class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm,
                               SDNode OpNode>:
   FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
      !strconcat(instr_asm, "\t$dst, $b, $c"),
@@ -283,9 +287,16 @@ let isCall=1, hasDelaySlot=1,
 }
 
 // Mul, Div
-class MulDiv<bits<6> func, string instr_asm, InstrItinClass itin>:
-  FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
-     !strconcat(instr_asm, "\t$a, $b"), [], itin>;
+let Defs = [HI, LO] in {
+  class Mul<bits<6> func, string instr_asm, InstrItinClass itin>:
+    FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
+       !strconcat(instr_asm, "\t$a, $b"), [], itin>;
+
+  class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
+            FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
+            !strconcat(instr_asm, "\t$$zero, $a, $b"),
+            [(op CPURegs:$a, CPURegs:$b)], itin>;
+}
 
 // Move from Hi/Lo
 class MoveFromLOHI<bits<6> func, string instr_asm>:
@@ -348,6 +359,11 @@ def REORDER   : MipsPseudo<(outs), (ins), ".set\treorder",   []>;
 def NOMACRO   : MipsPseudo<(outs), (ins), ".set\tnomacro",   []>;
 def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>;
 
+// These macros are inserted to prevent GAS from complaining
+// when using the AT register.
+def NOAT      : MipsPseudo<(outs), (ins), ".set\tnoat", []>;
+def ATMACRO   : MipsPseudo<(outs), (ins), ".set\tat", []>;
+
 // When handling PIC code the assembler needs .cpload and .cprestore
 // directives. If the real instructions corresponding these directives
 // are used, we have the same behavior, but get also a bunch of warnings
@@ -355,18 +371,6 @@ def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>;
 def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
 def CPRESTORE : MipsPseudo<(outs), (ins uimm16:$loc), ".cprestore\t$loc\n", []>;
 
-// The supported Mips ISAs dont have any instruction close to the SELECT_CC
-// operation. The solution is to create a Mips pseudo SELECT_CC instruction
-// (MipsSelectCC), use LowerSELECT_CC to generate this instruction and finally
-// replace it for real supported nodes into EmitInstrWithCustomInserter
-let usesCustomInserter = 1 in {
-  class PseudoSelCC<RegisterClass RC, string asmstr>:
-    MipsPseudo<(outs RC:$dst), (ins CPURegs:$CmpRes, RC:$T, RC:$F), asmstr,
-    [(set RC:$dst, (MipsSelectCC CPURegs:$CmpRes, RC:$T, RC:$F))]>;
-}
-
-def Select_CC : PseudoSelCC<CPURegs, "# MipsSelect_CC_i32">;
-
 //===----------------------------------------------------------------------===//
 // Instruction definition
 //===----------------------------------------------------------------------===//
@@ -447,12 +451,10 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1,
                 "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>;
 
 /// Multiply and Divide Instructions.
-let Defs = [HI, LO] in {
-  def MULT    : MulDiv<0x18, "mult", IIImul>;
-  def MULTu   : MulDiv<0x19, "multu", IIImul>;
-  def DIV     : MulDiv<0x1a, "div", IIIdiv>;
-  def DIVu    : MulDiv<0x1b, "divu", IIIdiv>;
-}
+def MULT    : Mul<0x18, "mult", IIImul>;
+def MULTu   : Mul<0x19, "multu", IIImul>;
+def SDIV    : Div<MipsDivRem, 0x1a, "div", IIIdiv>;
+def UDIV    : Div<MipsDivRemU, 0x1b, "divu", IIIdiv>;
 
 let Defs = [HI] in
   def MTHI  : MoveToLOHI<0x11, "mthi">;
@@ -489,10 +491,19 @@ let Predicates = [HasSwap] in {
 def MIPS_CMOV_ZERO  : PatLeaf<(i32 0)>;
 def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>;
 
-let Predicates = [HasCondMov], Constraints = "$F = $dst" in {
-  def MOVN : CondMov<0x0a, "movn", MIPS_CMOV_NZERO>;
-  def MOVZ : CondMov<0x0b, "movz", MIPS_CMOV_ZERO>;
-}
+// Conditional moves:
+// These instructions are expanded in
+// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
+// conditional move instructions.
+// flag:int, data:int
+let usesCustomInserter = 1, shamt = 0, Constraints = "$F = $dst" in
+  class CondMovIntInt<bits<6> funct, string instr_asm> :
+    FR<0, funct, (outs CPURegs:$dst),
+       (ins CPURegs:$T, CPURegs:$cond, CPURegs:$F),
+       !strconcat(instr_asm, "\t$dst, $T, $cond"), [], NoItinerary>;
+
+def MOVZ_I : CondMovIntInt<0x0a, "movz">;
+def MOVN_I : CondMovIntInt<0x0b, "movn">;
 
 /// No operation
 let addr=0 in
@@ -533,7 +544,7 @@ def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs),
           (SUBu CPURegs:$lhs, CPURegs:$rhs)>;
 def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs),
           (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
-def : Pat<(addc  CPURegs:$src, imm:$imm),
+def : Pat<(addc  CPURegs:$src, immSExt16:$imm),
           (ADDiu CPURegs:$src, imm:$imm)>;
 
 // Call
@@ -546,8 +557,11 @@ def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
 
 // hi/lo relocs
 def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
+def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
 def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
           (ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
+          (ADDiu CPURegs:$hi, tblockaddress:$lo)>;
 
 def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
 def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
@@ -599,33 +613,43 @@ def : Pat<(brcond CPURegs:$cond, bb:$dst),
           (BNE CPURegs:$cond, ZERO, bb:$dst)>;
 
 // select patterns
-def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
-          (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$lhs, CPURegs:$rhs))>;
-def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
-          (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs))>;
-def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), CPURegs:$T, CPURegs:$F),
-          (MOVZ CPURegs:$F, CPURegs:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs))>;
-def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), CPURegs:$T, CPURegs:$F),
-          (MOVZ CPURegs:$F, CPURegs:$T, (SLTiu CPURegs:$lh, immSExt16:$rh))>;
-
-def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
-          (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$rhs, CPURegs:$lhs))>;
-def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
-          (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs))>;
-
-def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
-          (MOVZ CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
-def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
-          (MOVN CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
-
-def : Pat<(select CPURegs:$cond, CPURegs:$T, CPURegs:$F),
-          (MOVN CPURegs:$F, CPURegs:$T, CPURegs:$cond)>;
+multiclass MovzPats<RegisterClass RC, Instruction MOVZInst> {
+  def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+            (MOVZInst RC:$T, (SLT CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+  def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+            (MOVZInst RC:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+  def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), RC:$T, RC:$F),
+            (MOVZInst RC:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs), RC:$F)>;
+  def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), RC:$T, RC:$F),
+            (MOVZInst RC:$T, (SLTiu CPURegs:$lh, immSExt16:$rh), RC:$F)>;
+  def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+            (MOVZInst RC:$T, (SLT CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
+  def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+            (MOVZInst RC:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
+  def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+            (MOVZInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+  def : Pat<(select (seteq CPURegs:$lhs, 0), RC:$T, RC:$F),
+            (MOVZInst RC:$T, CPURegs:$lhs, RC:$F)>;
+}
+
+multiclass MovnPats<RegisterClass RC, Instruction MOVNInst> {
+  def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+            (MOVNInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+  def : Pat<(select CPURegs:$cond, RC:$T, RC:$F),
+            (MOVNInst RC:$T, CPURegs:$cond, RC:$F)>;
+  def : Pat<(select (setne CPURegs:$lhs, 0), RC:$T, RC:$F),
+            (MOVNInst RC:$T, CPURegs:$lhs, RC:$F)>;
+}
+
+defm : MovzPats<CPURegs, MOVZ_I>;
+defm : MovnPats<CPURegs, MOVN_I>;
 
 // select patterns with got access
-def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs),
-                  (i32 tglobaladdr:$T), CPURegs:$F),
-          (MOVN CPURegs:$F, (ADDiu GP, tglobaladdr:$T),
-                (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+let AddedComplexity = 10 in
+  def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs),
+                    (i32 tglobaladdr:$T), CPURegs:$F),
+            (MOVN_I CPURegs:$F, (ADDiu GP, tglobaladdr:$T),
+                    (XOR CPURegs:$lhs, CPURegs:$rhs))>;
 
 // setcc patterns
 def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs),
diff --git a/lib/Target/Mips/MipsMCAsmInfo.h b/lib/Target/Mips/MipsMCAsmInfo.h
index 15a867e..41b7192 100644
--- a/lib/Target/Mips/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MipsMCAsmInfo.h
@@ -19,7 +19,7 @@
 
 namespace llvm {
   class Target;
-  
+
   class MipsMCAsmInfo : public MCAsmInfo {
   public:
     explicit MipsMCAsmInfo(const Target &T, StringRef TT);
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 3719e58..c09b129 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -38,7 +38,7 @@
 
 using namespace llvm;
 
-MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, 
+MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
                                    const TargetInstrInfo &tii)
   : MipsGenRegisterInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
     Subtarget(ST), TII(tii) {}
@@ -46,7 +46,7 @@ MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
 /// getRegisterNumbering - Given the enum value for some register, e.g.
 /// Mips::RA, return the number that it corresponds to (e.g. 31).
 unsigned MipsRegisterInfo::
-getRegisterNumbering(unsigned RegEnum) 
+getRegisterNumbering(unsigned RegEnum)
 {
   switch (RegEnum) {
     case Mips::ZERO : case Mips::F0 : case Mips::D0 : return 0;
@@ -82,30 +82,30 @@ getRegisterNumbering(unsigned RegEnum)
     case Mips::FP   : case Mips::F30: case Mips::D15: return 30;
     case Mips::RA   : case Mips::F31: return 31;
     default: llvm_unreachable("Unknown register number!");
-  }    
+  }
   return 0; // Not reached
 }
 
 unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
 
 //===----------------------------------------------------------------------===//
-// Callee Saved Registers methods 
+// Callee Saved Registers methods
 //===----------------------------------------------------------------------===//
 
 /// Mips Callee Saved Registers
 const unsigned* MipsRegisterInfo::
-getCalleeSavedRegs(const MachineFunction *MF) const 
+getCalleeSavedRegs(const MachineFunction *MF) const
 {
   // Mips callee-save register range is $16-$23, $f20-$f30
   static const unsigned SingleFloatOnlyCalleeSavedRegs[] = {
-    Mips::S0, Mips::S1, Mips::S2, Mips::S3, 
+    Mips::S0, Mips::S1, Mips::S2, Mips::S3,
     Mips::S4, Mips::S5, Mips::S6, Mips::S7,
-    Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24, Mips::F25, 
+    Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24, Mips::F25,
     Mips::F26, Mips::F27, Mips::F28, Mips::F29, Mips::F30, 0
   };
 
   static const unsigned BitMode32CalleeSavedRegs[] = {
-    Mips::S0, Mips::S1, Mips::S2, Mips::S3, 
+    Mips::S0, Mips::S1, Mips::S2, Mips::S3,
     Mips::S4, Mips::S5, Mips::S6, Mips::S7,
     Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, 0
   };
@@ -132,11 +132,11 @@ getReservedRegs(const MachineFunction &MF) const {
   if (!Subtarget.isSingleFloat())
     for (unsigned FReg=(Mips::F0)+1; FReg < Mips::F30; FReg+=2)
       Reserved.set(FReg);
-  
+
   return Reserved;
 }
 
-// This function eliminate ADJCALLSTACKDOWN, 
+// This function eliminate ADJCALLSTACKDOWN,
 // ADJCALLSTACKUP pseudo instructions
 void MipsRegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -157,7 +157,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
   unsigned i = 0;
   while (!MI.getOperand(i).isFI()) {
     ++i;
-    assert(i < MI.getNumOperands() && 
+    assert(i < MI.getNumOperands() &&
            "Instr doesn't have FrameIndex operand!");
   }
 
@@ -179,8 +179,43 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
 
   DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");
 
-  MI.getOperand(i-1).ChangeToImmediate(Offset);
-  MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
+  unsigned NewReg = 0;
+  int NewImm = 0;
+  MachineBasicBlock &MBB = *MI.getParent();
+  bool ATUsed;
+  unsigned OrigReg = getFrameRegister(MF);
+  int OrigImm = Offset;
+
+// OrigImm fits in the 16-bit field
+  if (OrigImm < 0x8000 && OrigImm >= -0x8000) {
+    NewReg = OrigReg;
+    NewImm = OrigImm;
+    ATUsed = false;
+  }
+  else {
+    const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+    DebugLoc DL = II->getDebugLoc();
+    int ImmLo = OrigImm & 0xffff;
+    int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) +
+                ((OrigImm & 0x8000) != 0);
+
+    // FIXME: change this when mips goes MC".
+    BuildMI(MBB, II, DL, TII->get(Mips::NOAT));
+    BuildMI(MBB, II, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi);
+    BuildMI(MBB, II, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg)
+                                                        .addReg(Mips::AT);
+    NewReg = Mips::AT;
+    NewImm = ImmLo;
+    
+    ATUsed = true;
+  }
+
+  // FIXME: change this when mips goes MC".
+  if (ATUsed)
+    BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO));
+
+  MI.getOperand(i).ChangeToRegister(NewReg, false);
+  MI.getOperand(i-1).ChangeToImmediate(NewImm);
 }
 
 void MipsRegisterInfo::
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index a7f4bf9..767359f 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -26,7 +26,7 @@ class Type;
 struct MipsRegisterInfo : public MipsGenRegisterInfo {
   const MipsSubtarget &Subtarget;
   const TargetInstrInfo &TII;
-  
+
   MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii);
 
   /// getRegisterNumbering - Given the enum value for some register, e.g.
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 60efe31..9f9cae7 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -17,7 +17,7 @@ class MipsReg<string n> : Register<n> {
   let Namespace = "Mips";
 }
 
-class MipsRegWithSubRegs<string n, list<Register> subregs> 
+class MipsRegWithSubRegs<string n, list<Register> subregs>
   : RegisterWithSubRegs<n, subregs> {
   field bits<5> Num;
   let Namespace = "Mips";
@@ -83,7 +83,7 @@ let Namespace = "Mips" in {
   def SP   : MipsGPRReg< 29, "SP">,  DwarfRegNum<[29]>;
   def FP   : MipsGPRReg< 30, "FP">,  DwarfRegNum<[30]>;
   def RA   : MipsGPRReg< 31, "RA">,  DwarfRegNum<[31]>;
-  
+
   /// Mips Single point precision FPU Registers
   def F0  : FPR< 0,  "F0">, DwarfRegNum<[32]>;
   def F1  : FPR< 1,  "F1">, DwarfRegNum<[33]>;
@@ -117,7 +117,7 @@ let Namespace = "Mips" in {
   def F29 : FPR<29, "F29">, DwarfRegNum<[61]>;
   def F30 : FPR<30, "F30">, DwarfRegNum<[62]>;
   def F31 : FPR<31, "F31">, DwarfRegNum<[63]>;
-  
+
   /// Mips Double point precision FPU Registers (aliased
   /// with the single precision to hold 64 bit values)
   def D0  : AFPR< 0,  "F0", [F0,   F1]>, DwarfRegNum<[32]>;
@@ -149,11 +149,11 @@ let Namespace = "Mips" in {
 // Register Classes
 //===----------------------------------------------------------------------===//
 
-def CPURegs : RegisterClass<"Mips", [i32], 32, 
+def CPURegs : RegisterClass<"Mips", [i32], 32,
   // Return Values and Arguments
   [V0, V1, A0, A1, A2, A3,
   // Not preserved across procedure calls
-  T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, 
+  T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
   // Callee save
   S0, S1, S2, S3, S4, S5, S6, S7,
   // Reserved
@@ -173,16 +173,16 @@ def CPURegs : RegisterClass<"Mips", [i32], 32,
 
 // 64bit fp:
 // * FGR64  - 32 64-bit registers
-// * AFGR64 - 16 32-bit even registers (32-bit FP Mode) 
+// * AFGR64 - 16 32-bit even registers (32-bit FP Mode)
 //
 // 32bit fp:
 // * FGR32 - 16 32-bit even registers
 // * FGR32 - 32 32-bit registers (single float only mode)
-def FGR32 : RegisterClass<"Mips", [f32], 32, 
+def FGR32 : RegisterClass<"Mips", [f32], 32,
   // Return Values and Arguments
   [F0, F1, F2, F3, F12, F13, F14, F15,
   // Not preserved across procedure calls
-  F4, F5, F6, F7, F8, F9, F10, F11, F16, F17, F18, F19, 
+  F4, F5, F6, F7, F8, F9, F10, F11, F16, F17, F18, F19,
   // Callee save
   F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
   // Reserved
@@ -195,17 +195,17 @@ def FGR32 : RegisterClass<"Mips", [f32], 32,
   let MethodBodies = [{
 
     static const unsigned MIPS_FGR32[] = {
-      Mips::F0,  Mips::F1,  Mips::F2,  Mips::F3,  Mips::F12,  Mips::F13, 
-      Mips::F14, Mips::F15, Mips::F4,  Mips::F5,  Mips::F6,   Mips::F7, 
-      Mips::F8,  Mips::F9,  Mips::F10, Mips::F11, Mips::F16,  Mips::F17, 
-      Mips::F18, Mips::F19, Mips::F20, Mips::F21, Mips::F22,  Mips::F23, 
-      Mips::F24, Mips::F25, Mips::F26, Mips::F27, Mips::F28,  Mips::F29, 
+      Mips::F0,  Mips::F1,  Mips::F2,  Mips::F3,  Mips::F12,  Mips::F13,
+      Mips::F14, Mips::F15, Mips::F4,  Mips::F5,  Mips::F6,   Mips::F7,
+      Mips::F8,  Mips::F9,  Mips::F10, Mips::F11, Mips::F16,  Mips::F17,
+      Mips::F18, Mips::F19, Mips::F20, Mips::F21, Mips::F22,  Mips::F23,
+      Mips::F24, Mips::F25, Mips::F26, Mips::F27, Mips::F28,  Mips::F29,
       Mips::F30
     };
 
     static const unsigned MIPS_SVR4_FGR32[] = {
-      Mips::F0,  Mips::F2,  Mips::F12, Mips::F14, Mips::F4, 
-      Mips::F6,  Mips::F8,  Mips::F10, Mips::F16, Mips::F18, 
+      Mips::F0,  Mips::F2,  Mips::F12, Mips::F14, Mips::F4,
+      Mips::F6,  Mips::F8,  Mips::F10, Mips::F16, Mips::F18,
       Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30,
     };
 
@@ -217,7 +217,7 @@ def FGR32 : RegisterClass<"Mips", [f32], 32,
       if (Subtarget.isSingleFloat())
         return MIPS_FGR32;
       else
-        return MIPS_SVR4_FGR32; 
+        return MIPS_SVR4_FGR32;
     }
 
     FGR32Class::iterator
@@ -233,11 +233,11 @@ def FGR32 : RegisterClass<"Mips", [f32], 32,
   }];
 }
 
-def AFGR64 : RegisterClass<"Mips", [f64], 64, 
+def AFGR64 : RegisterClass<"Mips", [f64], 64,
   // Return Values and Arguments
   [D0, D1, D6, D7,
   // Not preserved across procedure calls
-  D2, D3, D4, D5, D8, D9, 
+  D2, D3, D4, D5, D8, D9,
   // Callee save
   D10, D11, D12, D13, D14,
   // Reserved
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index 49ca5d1..00be8ee 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -14,7 +14,7 @@ def ALU     : FuncUnit;
 def IMULDIV : FuncUnit;
 
 //===----------------------------------------------------------------------===//
-// Instruction Itinerary classes used for Mips 
+// Instruction Itinerary classes used for Mips
 //===----------------------------------------------------------------------===//
 def IIAlu              : InstrItinClass;
 def IILoad             : InstrItinClass;
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index db114da..70747f5d 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -17,7 +17,7 @@
 using namespace llvm;
 
 MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &FS,
-                             bool little) : 
+                             bool little) :
   MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false),
   IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true),
   HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false),
@@ -33,7 +33,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &FS,
   if (TT.find("linux") == std::string::npos)
     IsLinux = false;
 
-  // When only the target triple is specified and is 
+  // When only the target triple is specified and is
   // a allegrex target, set the features. We also match
   // big and little endian allegrex cores (dont really
   // know if a big one exists)
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index e4f4b33..096bbed 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -26,7 +26,7 @@ class MipsSubtarget : public TargetSubtarget {
 public:
   enum MipsABIEnum {
     O32, O64, N32, N64, EABI
-  }; 
+  };
 
 protected:
 
@@ -34,10 +34,10 @@ protected:
     Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2
   };
 
-  // Mips architecture version 
+  // Mips architecture version
   MipsArchEnum MipsArchVersion;
 
-  // Mips supported ABIs 
+  // Mips supported ABIs
   MipsABIEnum MipsABI;
 
   // IsLittle - The target is Little Endian
@@ -61,14 +61,14 @@ protected:
   bool IsLinux;
 
   /// Features related to the presence of specific instructions.
-  
+
   // HasSEInReg - SEB and SEH (signext in register) instructions.
   bool HasSEInReg;
 
   // HasCondMov - Conditional mov (MOVZ, MOVN) instructions.
   bool HasCondMov;
 
-  // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu) 
+  // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu)
   // instructions.
   bool HasMulDivAdd;
 
@@ -93,14 +93,14 @@ public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   MipsSubtarget(const std::string &TT, const std::string &FS, bool little);
-  
-  /// ParseSubtargetFeatures - Parses features string setting specified 
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
   std::string ParseSubtargetFeatures(const std::string &FS,
                                      const std::string &CPU);
 
   bool isMips1() const { return MipsArchVersion == Mips1; }
-  bool isMips32() const { return MipsArchVersion >= Mips32; } 
+  bool isMips32() const { return MipsArchVersion >= Mips32; }
   bool isMips32r2() const { return MipsArchVersion == Mips32r2; }
 
   bool isLittle() const { return IsLittle; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 7a2dd1f..53190b4 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -75,3 +75,9 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
   PM.add(createMipsDelaySlotFillerPass(*this));
   return true;
 }
+
+bool MipsTargetMachine::
+addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+  PM.add(createMipsExpandPseudoPass(*this));
+  return true;
+}
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 43ab798..badb652 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -63,6 +63,7 @@ namespace llvm {
                                  CodeGenOpt::Level OptLevel);
     virtual bool addPreEmitPass(PassManagerBase &PM,
                                 CodeGenOpt::Level OptLevel);
+    virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level);
   };
 
 /// MipselTargetMachine - Mipsel target machine.
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index 237b160..c394a9d 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -18,22 +18,22 @@ namespace llvm {
     const MCSection *SmallDataSection;
     const MCSection *SmallBSSSection;
   public:
-    
+
     void Initialize(MCContext &Ctx, const TargetMachine &TM);
 
-    
+
     /// IsGlobalInSmallSection - Return true if this global address should be
     /// placed into small data/bss section.
     bool IsGlobalInSmallSection(const GlobalValue *GV,
                                 const TargetMachine &TM, SectionKind Kind)const;
     bool IsGlobalInSmallSection(const GlobalValue *GV,
-                                const TargetMachine &TM) const;  
-    
+                                const TargetMachine &TM) const;
+
     const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
                                             SectionKind Kind,
                                             Mangler *Mang,
                                             const TargetMachine &TM) const;
-      
+
     // TODO: Classify globals as mips wishes.
   };
 } // end namespace llvm
diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index cc3d61e..a8d6fe9 100644
--- a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -14,7 +14,7 @@ using namespace llvm;
 
 Target llvm::TheMipsTarget, llvm::TheMipselTarget;
 
-extern "C" void LLVMInitializeMipsTargetInfo() { 
+extern "C" void LLVMInitializeMipsTargetInfo() {
   RegisterTarget<Triple::mips> X(TheMipsTarget, "mips", "Mips");
 
   RegisterTarget<Triple::mipsel> Y(TheMipselTarget, "mipsel", "Mipsel");
diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h
index 19385ba..ec2be92 100644
--- a/lib/Target/PTX/PTX.h
+++ b/lib/Target/PTX/PTX.h
@@ -29,6 +29,11 @@ namespace llvm {
       PARAMETER = 3,
       SHARED = 4
     };
+
+    enum Predicate {
+      PRED_NORMAL = 0,
+      PRED_NEGATE = 1
+    };
   } // namespace PTX
 
   FunctionPass *createPTXISelDag(PTXTargetMachine &TM,
@@ -37,7 +42,8 @@ namespace llvm {
   FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
                                        CodeGenOpt::Level OptLevel);
 
-  extern Target ThePTXTarget;
+  extern Target ThePTX32Target;
+  extern Target ThePTX64Target;
 } // namespace llvm;
 
 // Defines symbolic names for PTX registers.
diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td
index 8b1a1b1..ae8326e 100644
--- a/lib/Target/PTX/PTX.td
+++ b/lib/Target/PTX/PTX.td
@@ -19,8 +19,35 @@ include "llvm/Target/Target.td"
 // Subtarget Features.
 //===----------------------------------------------------------------------===//
 
-def FeatureSM20 : SubtargetFeature<"sm20", "is_sm20", "true",
-                                   "Enable sm_20 target architecture">;
+//===- Architectural Features ---------------------------------------------===//
+
+def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
+                                     "Do not demote .f64 to .f32">;
+
+//===- PTX Version --------------------------------------------------------===//
+
+def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
+                                    "Use PTX Language Version 2.0",
+                                    []>;
+
+def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
+                                    "Use PTX Language Version 2.1",
+                                    [FeaturePTX20]>;
+
+def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2",
+                                    "Use PTX Language Version 2.2",
+                                    [FeaturePTX21]>;
+
+//===- PTX Shader Model ---------------------------------------------------===//
+
+def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0",
+                                   "Enable Shader Model 1.0 compliance">;
+def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3",
+                                   "Enable Shader Model 1.3 compliance",
+                                   [FeatureSM10, FeatureDouble]>;
+def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0",
+                                   "Enable Shader Model 2.0 compliance",
+                                   [FeatureSM13]>;
 
 //===----------------------------------------------------------------------===//
 // PTX supported processors.
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index a605997..29c4781 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
@@ -37,13 +38,6 @@
 
 using namespace llvm;
 
-static cl::opt<std::string>
-OptPTXVersion("ptx-version", cl::desc("Set PTX version"), cl::init("1.4"));
-
-static cl::opt<std::string>
-OptPTXTarget("ptx-target", cl::desc("Set GPU target (comma-separated list)"),
-             cl::init("sm_10"));
-
 namespace {
 class PTXAsmPrinter : public AsmPrinter {
 public:
@@ -68,6 +62,7 @@ public:
                        const char *Modifier = 0);
   void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
                          const char *Modifier = 0);
+  void printPredicateOperand(const MachineInstr *MI, raw_ostream &O);
 
   // autogen'd.
   void printInstruction(const MachineInstr *MI, raw_ostream &OS);
@@ -82,27 +77,20 @@ private:
 static const char PARAM_PREFIX[] = "__param_";
 
 static const char *getRegisterTypeName(unsigned RegNo) {
-#define TEST_REGCLS(cls, clsstr) \
+#define TEST_REGCLS(cls, clsstr)                \
   if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
-  TEST_REGCLS(RRegs32, s32);
   TEST_REGCLS(Preds, pred);
+  TEST_REGCLS(RRegu16, u16);
+  TEST_REGCLS(RRegu32, u32);
+  TEST_REGCLS(RRegu64, u64);
+  TEST_REGCLS(RRegf32, f32);
+  TEST_REGCLS(RRegf64, f64);
 #undef TEST_REGCLS
 
   llvm_unreachable("Not in any register class!");
   return NULL;
 }
 
-static const char *getInstructionTypeName(const MachineInstr *MI) {
-  for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (MO.getType() == MachineOperand::MO_Register)
-      return getRegisterTypeName(MO.getReg());
-  }
-
-  llvm_unreachable("No reg operand found in instruction!");
-  return NULL;
-}
-
 static const char *getStateSpaceName(unsigned addressSpace) {
   switch (addressSpace) {
   default: llvm_unreachable("Unknown state space");
@@ -115,6 +103,28 @@ static const char *getStateSpaceName(unsigned addressSpace) {
   return NULL;
 }
 
+static const char *getTypeName(const Type* type) {
+  while (true) {
+    switch (type->getTypeID()) {
+      default: llvm_unreachable("Unknown type");
+      case Type::FloatTyID: return ".f32";
+      case Type::DoubleTyID: return ".f64";
+      case Type::IntegerTyID:
+        switch (type->getPrimitiveSizeInBits()) {
+          default: llvm_unreachable("Unknown integer bit-width");
+          case 16: return ".u16";
+          case 32: return ".u32";
+          case 64: return ".u64";
+        }
+      case Type::ArrayTyID:
+      case Type::PointerTyID:
+        type = dyn_cast<const SequentialType>(type)->getElementType();
+        break;
+    }
+  }
+  return NULL;
+}
+
 bool PTXAsmPrinter::doFinalization(Module &M) {
   // XXX Temproarily remove global variables so that doFinalization() will not
   // emit them again (global variables are emitted at beginning).
@@ -146,8 +156,12 @@ bool PTXAsmPrinter::doFinalization(Module &M) {
 
 void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
 {
-  OutStreamer.EmitRawText(Twine("\t.version " + OptPTXVersion));
-  OutStreamer.EmitRawText(Twine("\t.target " + OptPTXTarget));
+  const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+
+  OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString()));
+  OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
+                                (ST.supportsDouble() ? ""
+                                                     : ", map_f64_to_f32")));
   OutStreamer.AddBlankLine();
 
   // declare global variables
@@ -186,17 +200,16 @@ void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   std::string str;
   str.reserve(64);
 
-  // Write instruction to str
   raw_string_ostream OS(str);
+
+  // Emit predicate
+  printPredicateOperand(MI, OS);
+
+  // Write instruction to str
   printInstruction(MI, OS);
   OS << ';';
   OS.flush();
 
-  // Replace "%type" if found
-  size_t pos;
-  if ((pos = str.find("%type")) != std::string::npos)
-    str.replace(pos, /*strlen("%type")==*/5, getInstructionTypeName(MI));
-
   StringRef strref = StringRef(str);
   OutStreamer.EmitRawText(strref);
 }
@@ -213,11 +226,36 @@ void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
       OS << *Mang->getSymbol(MO.getGlobal());
       break;
     case MachineOperand::MO_Immediate:
-      OS << (int) MO.getImm();
+      OS << (long) MO.getImm();
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      OS << *MO.getMBB()->getSymbol();
       break;
     case MachineOperand::MO_Register:
       OS << getRegisterName(MO.getReg());
       break;
+    case MachineOperand::MO_FPImmediate:
+      APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt();
+      bool  isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID;
+      // Emit 0F for 32-bit floats and 0D for 64-bit doubles.
+      if (isFloat) {
+        OS << "0F";
+      }
+      else {
+        OS << "0D";
+      }
+      // Emit the encoded floating-point value.
+      if (constFP.getZExtValue() > 0) {
+        OS << constFP.toString(16, false);
+      }
+      else {
+        OS << "00000000";
+        // If We have a double-precision zero, pad to 8-bytes.
+        if (!isFloat) {
+          OS << "00000000";
+        }
+      }
+      break;
   }
 }
 
@@ -265,13 +303,77 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
     decl += " ";
   }
 
-  // TODO: add types
-  decl += ".s32 ";
 
-  decl += gvsym->getName();
+  if (PointerType::classof(gv->getType())) {
+    const PointerType* pointerTy = dyn_cast<const PointerType>(gv->getType());
+    const Type* elementTy = pointerTy->getElementType();
+
+    decl += ".b8 ";
+    decl += gvsym->getName();
+    decl += "[";
+    
+    if (elementTy->isArrayTy())
+    {
+      assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
 
-  if (ArrayType::classof(gv->getType()) || PointerType::classof(gv->getType()))
-    decl += "[]";
+      const ArrayType* arrayTy = dyn_cast<const ArrayType>(elementTy);
+      elementTy = arrayTy->getElementType();
+
+      unsigned numElements = arrayTy->getNumElements();
+      
+      while (elementTy->isArrayTy()) {
+
+        arrayTy = dyn_cast<const ArrayType>(elementTy);
+        elementTy = arrayTy->getElementType();
+
+        numElements *= arrayTy->getNumElements();
+      }
+
+      // FIXME: isPrimitiveType() == false for i16?
+      assert(elementTy->isSingleValueType() &&
+              "Non-primitive types are not handled");
+
+      // Compute the size of the array, in bytes.
+      uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3)
+                        * numElements;
+  
+      decl += utostr(arraySize);
+    }
+    
+    decl += "]";
+    
+    // handle string constants (assume ConstantArray means string)
+    
+    if (gv->hasInitializer())
+    {
+      Constant *C = gv->getInitializer();  
+      if (const ConstantArray *CA = dyn_cast<ConstantArray>(C))
+      {
+        decl += " = {";
+
+        for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+        {
+          if (i > 0)   decl += ",";
+      
+          decl += "0x" + utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+        }
+      
+        decl += "}";
+      }
+    }
+  }
+  else {
+    // Note: this is currently the fall-through case and most likely generates
+    //       incorrect code.
+    decl += getTypeName(gv->getType());
+    decl += " ";
+
+    decl += gvsym->getName();
+
+    if (ArrayType::classof(gv->getType()) ||
+        PointerType::classof(gv->getType()))
+      decl += "[]";
+  }
 
   decl += ";";
 
@@ -313,16 +415,24 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
   if (!MFI->argRegEmpty()) {
     decl += " (";
     if (isKernel) {
-      for (int i = 0, e = MFI->getNumArg(); i != e; ++i) {
-        if (i != 0)
+      unsigned cnt = 0;
+      for(PTXMachineFunctionInfo::reg_iterator
+          i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
+          i != e; ++i) {
+        reg = *i;
+        assert(reg != PTX::NoRegister && "Not a valid register!");
+        if (i != b)
           decl += ", ";
-        decl += ".param .s32 "; // TODO: add types
+        decl += ".param .";
+        decl += getRegisterTypeName(reg);
+        decl += " ";
         decl += PARAM_PREFIX;
-        decl += utostr(i + 1);
+        decl += utostr(++cnt);
       }
     } else {
       for (PTXMachineFunctionInfo::reg_iterator
-           i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; i != e; ++i) {
+           i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
+           i != e; ++i) {
         reg = *i;
         assert(reg != PTX::NoRegister && "Not a valid register!");
         if (i != b)
@@ -339,9 +449,29 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
   OutStreamer.EmitRawText(Twine(decl));
 }
 
+void PTXAsmPrinter::
+printPredicateOperand(const MachineInstr *MI, raw_ostream &O) {
+  int i = MI->findFirstPredOperandIdx();
+  if (i == -1)
+    llvm_unreachable("missing predicate operand");
+
+  unsigned reg = MI->getOperand(i).getReg();
+  int predOp = MI->getOperand(i+1).getImm();
+
+  DEBUG(dbgs() << "predicate: (" << reg << ", " << predOp << ")\n");
+
+  if (reg != PTX::NoRegister) {
+    O << '@';
+    if (predOp == PTX::PRED_NEGATE)
+      O << '!';
+    O << getRegisterName(reg);
+  }
+}
+
 #include "PTXGenAsmWriter.inc"
 
 // Force static initialization.
 extern "C" void LLVMInitializePTXAsmPrinter() {
-  RegisterAsmPrinter<PTXAsmPrinter> X(ThePTXTarget);
+  RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target);
+  RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target);
 }
diff --git a/lib/Target/PTX/PTXFrameLowering.h b/lib/Target/PTX/PTXFrameLowering.h
index 574ae7a..9320676 100644
--- a/lib/Target/PTX/PTXFrameLowering.h
+++ b/lib/Target/PTX/PTXFrameLowering.h
@@ -27,7 +27,8 @@ protected:
 
 public:
   explicit PTXFrameLowering(const PTXSubtarget &sti)
-    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) {
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2),
+      STI(sti) {
   }
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp
index efb0e8b..b3c85da 100644
--- a/lib/Target/PTX/PTXISelDAGToDAG.cpp
+++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -15,6 +15,7 @@
 #include "PTXTargetMachine.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -42,8 +43,14 @@ class PTXDAGToDAGISel : public SelectionDAGISel {
   private:
     SDNode *SelectREAD_PARAM(SDNode *Node);
 
+    // We need this only because we can't match intruction BRAdp
+    // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td
+    SDNode *SelectBRCOND(SDNode *Node);
+
     bool isImm(const SDValue &operand);
     bool SelectImm(const SDValue &operand, SDValue &imm);
+
+    const PTXSubtarget& getSubtarget() const;
 }; // class PTXDAGToDAGISel
 } // namespace
 
@@ -59,21 +66,62 @@ PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM,
   : SelectionDAGISel(TM, OptLevel) {}
 
 SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
-  if (Node->getOpcode() == PTXISD::READ_PARAM)
-    return SelectREAD_PARAM(Node);
-  else
-    return SelectCode(Node);
+  switch (Node->getOpcode()) {
+    case PTXISD::READ_PARAM:
+      return SelectREAD_PARAM(Node);
+    case ISD::BRCOND:
+      return SelectBRCOND(Node);
+    default:
+      return SelectCode(Node);
+  }
 }
 
 SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) {
-  SDValue index = Node->getOperand(1);
-  DebugLoc dl = Node->getDebugLoc();
+  SDValue  index = Node->getOperand(1);
+  DebugLoc dl    = Node->getDebugLoc();
+  unsigned opcode;
 
   if (index.getOpcode() != ISD::TargetConstant)
     llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant");
 
+  if (Node->getValueType(0) == MVT::i16) {
+    opcode = PTX::LDpiU16;
+  }
+  else if (Node->getValueType(0) == MVT::i32) {
+    opcode = PTX::LDpiU32;
+  }
+  else if (Node->getValueType(0) == MVT::i64) {
+    opcode = PTX::LDpiU64;
+  }
+  else if (Node->getValueType(0) == MVT::f32) {
+    opcode = PTX::LDpiF32;
+  }
+  else if (Node->getValueType(0) == MVT::f64) {
+    opcode = PTX::LDpiF64;
+  }
+  else {
+    llvm_unreachable("Unknown parameter type for ld.param");
+  }
+
   return PTXInstrInfo::
-    GetPTXMachineNode(CurDAG, PTX::LDpi, dl, MVT::i32, index);
+    GetPTXMachineNode(CurDAG, opcode, dl, Node->getValueType(0), index);
+}
+
+SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
+  assert(Node->getNumOperands() >= 3);
+
+  SDValue Chain  = Node->getOperand(0);
+  SDValue Pred   = Node->getOperand(1);
+  SDValue Target = Node->getOperand(2); // branch target
+  SDValue PredOp = CurDAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+  DebugLoc dl = Node->getDebugLoc();
+
+  assert(Target.getOpcode()  == ISD::BasicBlock);
+  assert(Pred.getValueType() == MVT::i1);
+
+  // Emit BRAdp
+  SDValue Ops[] = { Target, Pred, PredOp, Chain };
+  return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4);
 }
 
 // Match memory operand of the form [reg+reg]
@@ -82,8 +130,11 @@ bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
       isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1)))
     return false;
 
+  assert(Addr.getValueType().isSimple() && "Type must be simple");
+
   R1 = Addr;
-  R2 = CurDAG->getTargetConstant(0, MVT::i32);
+  R2 = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
   return true;
 }
 
@@ -95,8 +146,12 @@ bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
     if (isImm(Addr))
       return false;
     // it is [reg]
+
+    assert(Addr.getValueType().isSimple() && "Type must be simple");
+
     Base = Addr;
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
     return true;
   }
 
@@ -129,7 +184,10 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
 
   // is [imm]?
   if (SelectImm(Addr, Base)) {
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    assert(Addr.getValueType().isSimple() && "Type must be simple");
+
+    Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
     return true;
   }
 
@@ -146,6 +204,13 @@ bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) {
     return false;
 
   ConstantSDNode *CN = cast<ConstantSDNode>(node);
-  imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(), MVT::i32);
+  imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(),
+                                  operand.getValueType());
   return true;
 }
+
+const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const
+{
+  return TM.getSubtarget<PTXSubtarget>();
+}
+
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
index e6d4490..23b93da 100644
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -27,21 +28,60 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
   : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
   // Set up the register classes.
   addRegisterClass(MVT::i1,  PTX::PredsRegisterClass);
-  addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass);
-
+  addRegisterClass(MVT::i16, PTX::RRegu16RegisterClass);
+  addRegisterClass(MVT::i32, PTX::RRegu32RegisterClass);
+  addRegisterClass(MVT::i64, PTX::RRegu64RegisterClass);
+  addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass);
+  addRegisterClass(MVT::f64, PTX::RRegf64RegisterClass);
+
+  setBooleanContents(ZeroOrOneBooleanContent);
+  
   setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
 
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+  
+  // Turn i16 (z)extload into load + (z)extend
+  setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+
+  // Turn f32 extload into load + fextend
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  
+  // Turn f64 truncstore into trunc + store.
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  
   // Customize translation of memory addresses
   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
-
+  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+
+  // Expand BR_CC into BRCOND
+  setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+  // Expand SELECT_CC into SETCC
+  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+  
+  // need to lower SETCC of Preds into bitwise logic
+  setOperationAction(ISD::SETCC, MVT::i1, Custom);
+  
   // Compute derived properties from the register classes
   computeRegisterProperties();
 }
 
+MVT::SimpleValueType PTXTargetLowering::getSetCCResultType(EVT VT) const {
+  return MVT::i1;
+}
+
 SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
-    default:                 llvm_unreachable("Unimplemented operand");
-    case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+    default:
+      llvm_unreachable("Unimplemented operand");
+    case ISD::SETCC:
+      return LowerSETCC(Op, DAG);
+    case ISD::GlobalAddress:
+      return LowerGlobalAddress(Op, DAG);
   }
 }
 
@@ -49,6 +89,8 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (Opcode) {
     default:
       llvm_unreachable("Unknown opcode");
+    case PTXISD::COPY_ADDRESS:
+      return "PTXISD::COPY_ADDRESS";
     case PTXISD::READ_PARAM:
       return "PTXISD::READ_PARAM";
     case PTXISD::EXIT:
@@ -62,12 +104,43 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
 //                      Custom Lower Operation
 //===----------------------------------------------------------------------===//
 
+SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer");
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  SDValue Op2 = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+  
+  // Look for X == 0, X == 1, X != 0, or X != 1  
+  // We can simplify these to bitwise logic
+  
+  if (Op1.getOpcode() == ISD::Constant &&
+      (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+       cast<ConstantSDNode>(Op1)->isNullValue()) &&
+      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+	  return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
+  }
+  
+  return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2);
+}
+
 SDValue PTXTargetLowering::
 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
   EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  return DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+
+  assert(PtrVT.isSimple() && "Pointer must be to primitive type.");
+
+  SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+  SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS,
+                                 dl,
+                                 PtrVT.getSimpleVT(),
+                                 targetGlobal);
+
+  return movInstr;
 }
 
 //===----------------------------------------------------------------------===//
@@ -87,9 +160,13 @@ struct argmap_entry {
   bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
 } argmap[] = {
   argmap_entry(MVT::i1,  PTX::PredsRegisterClass),
-  argmap_entry(MVT::i32, PTX::RRegs32RegisterClass)
+  argmap_entry(MVT::i16, PTX::RRegu16RegisterClass),
+  argmap_entry(MVT::i32, PTX::RRegu32RegisterClass),
+  argmap_entry(MVT::i64, PTX::RRegu64RegisterClass),
+  argmap_entry(MVT::f32, PTX::RRegf32RegisterClass),
+  argmap_entry(MVT::f64, PTX::RRegf64RegisterClass)
 };
-} // end anonymous namespace
+}                               // end anonymous namespace
 
 SDValue PTXTargetLowering::
   LowerFormalArguments(SDValue Chain,
@@ -185,10 +262,25 @@ SDValue PTXTargetLowering::
   if (Outs.size() == 0)
     return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
 
-  assert(Outs[0].VT == MVT::i32 && "Can return only basic types");
-
   SDValue Flag;
-  unsigned reg = PTX::R0;
+  unsigned reg;
+
+  if (Outs[0].VT == MVT::i16) {
+    reg = PTX::RH0;
+  }
+  else if (Outs[0].VT == MVT::i32) {
+    reg = PTX::R0;
+  }
+  else if (Outs[0].VT == MVT::i64) {
+    reg = PTX::RD0;
+  }
+  else if (Outs[0].VT == MVT::f32) {
+    reg = PTX::F0;
+  }
+  else {
+    assert(Outs[0].VT == MVT::f64 && "Can return only basic types");
+    reg = PTX::FD0;
+  }
 
   MachineFunction &MF = DAG.getMachineFunction();
   PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h
index b03a9f6..6a7e3e6 100644
--- a/lib/Target/PTX/PTXISelLowering.h
+++ b/lib/Target/PTX/PTXISelLowering.h
@@ -26,7 +26,8 @@ namespace PTXISD {
     FIRST_NUMBER = ISD::BUILTIN_OP_END,
     READ_PARAM,
     EXIT,
-    RET
+    RET,
+    COPY_ADDRESS
   };
 } // namespace PTXISD
 
@@ -41,6 +42,8 @@ class PTXTargetLowering : public TargetLowering {
 
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
+    virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv,
@@ -58,7 +61,9 @@ class PTXTargetLowering : public TargetLowering {
                   const SmallVectorImpl<SDValue> &OutVals,
                   DebugLoc dl,
                   SelectionDAG &DAG) const;
-
+    
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+    
   private:
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
 }; // class PTXTargetLowering
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index 805759b..a12a6d0 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -11,9 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "ptx-instrinfo"
+
 #include "PTX.h"
 #include "PTXInstrInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -27,20 +33,27 @@ static const struct map_entry {
   const TargetRegisterClass *cls;
   const int opcode;
 } map[] = {
-  { &PTX::RRegs32RegClass, PTX::MOVrr },
-  { &PTX::PredsRegClass,   PTX::MOVpp }
+  { &PTX::RRegu16RegClass, PTX::MOVU16rr },
+  { &PTX::RRegu32RegClass, PTX::MOVU32rr },
+  { &PTX::RRegu64RegClass, PTX::MOVU64rr },
+  { &PTX::RRegf32RegClass, PTX::MOVF32rr },
+  { &PTX::RRegf64RegClass, PTX::MOVF64rr },
+  { &PTX::PredsRegClass,   PTX::MOVPREDrr }
 };
 
 void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator I, DebugLoc DL,
                                unsigned DstReg, unsigned SrcReg,
                                bool KillSrc) const {
-  for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
-    if (PTX::RRegs32RegClass.contains(DstReg, SrcReg)) {
-      BuildMI(MBB, I, DL,
-              get(PTX::MOVrr), DstReg).addReg(SrcReg, getKillRegState(KillSrc));
+  for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) {
+    if (map[i].cls->contains(DstReg, SrcReg)) {
+      const TargetInstrDesc &TID = get(map[i].opcode);
+      MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg).
+        addReg(SrcReg, getKillRegState(KillSrc));
+      AddDefaultPredicate(MI);
       return;
     }
+  }
 
   llvm_unreachable("Impossible reg-to-reg copy");
 }
@@ -56,12 +69,9 @@ bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
 
   for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
     if (DstRC == map[i].cls) {
-      MachineInstr *MI = BuildMI(MBB, I, DL, get(map[i].opcode),
-                                 DstReg).addReg(SrcReg);
-      if (MI->findFirstPredOperandIdx() == -1) {
-        MI->addOperand(MachineOperand::CreateReg(0, false));
-        MI->addOperand(MachineOperand::CreateImm(/*IsInv=*/0));
-      }
+      const TargetInstrDesc &TID = get(map[i].opcode);
+      MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg).addReg(SrcReg);
+      AddDefaultPredicate(MI);
       return true;
     }
 
@@ -74,8 +84,12 @@ bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
   switch (MI.getOpcode()) {
     default:
       return false;
-    case PTX::MOVpp:
-    case PTX::MOVrr:
+    case PTX::MOVU16rr:
+    case PTX::MOVU32rr:
+    case PTX::MOVU64rr:
+    case PTX::MOVF32rr:
+    case PTX::MOVF64rr:
+    case PTX::MOVPREDrr:
       assert(MI.getNumOperands() >= 2 &&
              MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
              "Invalid register-register move instruction");
@@ -85,3 +99,239 @@ bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
       return true;
   }
 }
+
+// predicate support
+
+bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const {
+  int i = MI->findFirstPredOperandIdx();
+  return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister;
+}
+
+bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  return !isPredicated(MI) && get(MI->getOpcode()).isTerminator();
+}
+
+bool PTXInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+                     const SmallVectorImpl<MachineOperand> &Pred) const {
+  if (Pred.size() < 2)
+    llvm_unreachable("lesser than 2 predicate operands are provided");
+
+  int i = MI->findFirstPredOperandIdx();
+  if (i == -1)
+    llvm_unreachable("missing predicate operand");
+
+  MI->getOperand(i).setReg(Pred[0].getReg());
+  MI->getOperand(i+1).setImm(Pred[1].getImm());
+
+  return true;
+}
+
+bool PTXInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                  const SmallVectorImpl<MachineOperand> &Pred2) const {
+  const MachineOperand &PredReg1 = Pred1[0];
+  const MachineOperand &PredReg2 = Pred2[0];
+  if (PredReg1.getReg() != PredReg2.getReg())
+    return false;
+
+  const MachineOperand &PredOp1 = Pred1[1];
+  const MachineOperand &PredOp2 = Pred2[1];
+  if (PredOp1.getImm() != PredOp2.getImm())
+    return false;
+
+  return true;
+}
+
+bool PTXInstrInfo::
+DefinesPredicate(MachineInstr *MI,
+                 std::vector<MachineOperand> &Pred) const {
+  // If an instruction sets a predicate register, it defines a predicate.
+
+  // TODO supprot 5-operand format of setp instruction
+
+  if (MI->getNumOperands() < 1)
+    return false;
+
+  const MachineOperand &MO = MI->getOperand(0);
+
+  if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::PredsRegClass)
+    return false;
+
+  Pred.push_back(MO);
+  Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+  return true;
+}
+
+// branch support
+
+bool PTXInstrInfo::
+AnalyzeBranch(MachineBasicBlock &MBB,
+              MachineBasicBlock *&TBB,
+              MachineBasicBlock *&FBB,
+              SmallVectorImpl<MachineOperand> &Cond,
+              bool AllowModify) const {
+  // TODO implement cases when AllowModify is true
+
+  if (MBB.empty())
+    return true;
+
+  MachineBasicBlock::const_iterator iter = MBB.end();
+  const MachineInstr& instLast1 = *--iter;
+  const TargetInstrDesc &desc1 = instLast1.getDesc();
+  // for special case that MBB has only 1 instruction
+  const bool IsSizeOne = MBB.size() == 1;
+  // if IsSizeOne is true, *--iter and instLast2 are invalid
+  // we put a dummy value in instLast2 and desc2 since they are used
+  const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
+  const TargetInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc();
+
+  DEBUG(dbgs() << "\n");
+  DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
+  DEBUG(dbgs() << "AnalyzeBranch: MBB:    " << MBB.getName().str() << "\n");
+  DEBUG(dbgs() << "AnalyzeBranch: TBB:    " << TBB << "\n");
+  DEBUG(dbgs() << "AnalyzeBranch: FBB:    " << FBB << "\n");
+
+  // this block ends with no branches
+  if (!IsAnyKindOfBranch(instLast1)) {
+    DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n");
+    return false;
+  }
+
+  // this block ends with only an unconditional branch
+  if (desc1.isUnconditionalBranch() &&
+      // when IsSizeOne is true, it "absorbs" the evaluation of instLast2
+      (IsSizeOne || !IsAnyKindOfBranch(instLast2))) {
+    DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n");
+    TBB = GetBranchTarget(instLast1);
+    return false;
+  }
+
+  // this block ends with a conditional branch and
+  // it falls through to a successor block
+  if (desc1.isConditionalBranch() &&
+      IsAnySuccessorAlsoLayoutSuccessor(MBB)) {
+    DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n");
+    TBB = GetBranchTarget(instLast1);
+    int i = instLast1.findFirstPredOperandIdx();
+    Cond.push_back(instLast1.getOperand(i));
+    Cond.push_back(instLast1.getOperand(i+1));
+    return false;
+  }
+
+  // when IsSizeOne is true, we are done
+  if (IsSizeOne)
+    return true;
+
+  // this block ends with a conditional branch
+  // followed by an unconditional branch
+  if (desc2.isConditionalBranch() &&
+      desc1.isUnconditionalBranch()) {
+    DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n");
+    TBB = GetBranchTarget(instLast2);
+    FBB = GetBranchTarget(instLast1);
+    int i = instLast2.findFirstPredOperandIdx();
+    Cond.push_back(instLast2.getOperand(i));
+    Cond.push_back(instLast2.getOperand(i+1));
+    return false;
+  }
+
+  // branch cannot be understood
+  DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n");
+  return true;
+}
+
+unsigned PTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  unsigned count = 0;
+  while (!MBB.empty())
+    if (IsAnyKindOfBranch(MBB.back())) {
+      MBB.pop_back();
+      ++count;
+    } else
+      break;
+  DEBUG(dbgs() << "RemoveBranch: MBB:   " << MBB.getName().str() << "\n");
+  DEBUG(dbgs() << "RemoveBranch: remove " << count << " branch inst\n");
+  return count;
+}
+
+unsigned PTXInstrInfo::
+InsertBranch(MachineBasicBlock &MBB,
+             MachineBasicBlock *TBB,
+             MachineBasicBlock *FBB,
+             const SmallVectorImpl<MachineOperand> &Cond,
+             DebugLoc DL) const {
+  DEBUG(dbgs() << "InsertBranch: MBB: " << MBB.getName().str() << "\n");
+  DEBUG(if (TBB) dbgs() << "InsertBranch: TBB: " << TBB->getName().str()
+                        << "\n";
+        else     dbgs() << "InsertBranch: TBB: (NULL)\n");
+  DEBUG(if (FBB) dbgs() << "InsertBranch: FBB: " << FBB->getName().str()
+                        << "\n";
+        else     dbgs() << "InsertBranch: FBB: (NULL)\n");
+  DEBUG(dbgs() << "InsertBranch: Cond size: " << Cond.size() << "\n");
+
+  assert(TBB && "TBB is NULL");
+
+  if (FBB) {
+    BuildMI(&MBB, DL, get(PTX::BRAdp))
+      .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
+    BuildMI(&MBB, DL, get(PTX::BRAd))
+      .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL);
+    return 2;
+  } else if (Cond.size()) {
+    BuildMI(&MBB, DL, get(PTX::BRAdp))
+      .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
+    return 1;
+  } else {
+    BuildMI(&MBB, DL, get(PTX::BRAd))
+      .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL);
+    return 1;
+  }
+}
+
+// static helper routines
+
+MachineSDNode *PTXInstrInfo::
+GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+                  DebugLoc dl, EVT VT, SDValue Op1) {
+  SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
+  SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+  SDValue ops[] = { Op1, predReg, predOp };
+  return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+}
+
+MachineSDNode *PTXInstrInfo::
+GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+                  DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) {
+  SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
+  SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+  SDValue ops[] = { Op1, Op2, predReg, predOp };
+  return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+}
+
+void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
+  if (MI->findFirstPredOperandIdx() == -1) {
+    MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
+    MI->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+  }
+}
+
+bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
+  const TargetInstrDesc &desc = inst.getDesc();
+  return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch();
+}
+
+bool PTXInstrInfo::
+IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB) {
+  for (MachineBasicBlock::const_succ_iterator
+      i = MBB.succ_begin(), e = MBB.succ_end(); i != e; ++i)
+    if (MBB.isLayoutSuccessor((const MachineBasicBlock*) &*i))
+      return true;
+  return false;
+}
+
+MachineBasicBlock *PTXInstrInfo::GetBranchTarget(const MachineInstr& inst) {
+  // FIXME So far all branch instructions put destination in 1st operand
+  const MachineOperand& target = inst.getOperand(0);
+  assert(target.isMBB() && "FIXME: detect branch target operand");
+  return target.getMBB();
+}
diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h
index e7f00f0..a04be77 100644
--- a/lib/Target/PTX/PTXInstrInfo.h
+++ b/lib/Target/PTX/PTXInstrInfo.h
@@ -15,61 +15,93 @@
 #define PTX_INSTR_INFO_H
 
 #include "PTXRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
 namespace llvm {
 class PTXTargetMachine;
 
+class MachineSDNode;
+class SDValue;
+class SelectionDAG;
+
 class PTXInstrInfo : public TargetInstrInfoImpl {
-  private:
-    const PTXRegisterInfo RI;
-    PTXTargetMachine &TM;
-
-  public:
-    explicit PTXInstrInfo(PTXTargetMachine &_TM);
-
-    virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; }
-
-    virtual void copyPhysReg(MachineBasicBlock &MBB,
-                             MachineBasicBlock::iterator I, DebugLoc DL,
-                             unsigned DstReg, unsigned SrcReg,
-                             bool KillSrc) const;
-
-    virtual bool copyRegToReg(MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I,
-                              unsigned DstReg, unsigned SrcReg,
-                              const TargetRegisterClass *DstRC,
-                              const TargetRegisterClass *SrcRC,
-                              DebugLoc DL) const;
-
-    virtual bool isMoveInstr(const MachineInstr& MI,
-                             unsigned &SrcReg, unsigned &DstReg,
-                             unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
-    // static helper routines
-
-    static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
-                                            DebugLoc dl, EVT VT,
-                                            SDValue Op1) {
-      SDValue pred_reg = DAG->getRegister(0, MVT::i1);
-      SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32);
-      SDValue ops[] = { Op1, pred_reg, pred_imm };
-      return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
-    }
-
-    static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
-                                            DebugLoc dl, EVT VT,
-                                            SDValue Op1,
-                                            SDValue Op2) {
-      SDValue pred_reg = DAG->getRegister(0, MVT::i1);
-      SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32);
-      SDValue ops[] = { Op1, Op2, pred_reg, pred_imm };
-      return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
-    }
-
-  }; // class PTXInstrInfo
+private:
+  const PTXRegisterInfo RI;
+  PTXTargetMachine &TM;
+
+public:
+  explicit PTXInstrInfo(PTXTargetMachine &_TM);
+
+  virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; }
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DstReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  virtual bool copyRegToReg(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator I,
+                            unsigned DstReg, unsigned SrcReg,
+                            const TargetRegisterClass *DstRC,
+                            const TargetRegisterClass *SrcRC,
+                            DebugLoc DL) const;
+
+  virtual bool isMoveInstr(const MachineInstr& MI,
+                           unsigned &SrcReg, unsigned &DstReg,
+                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+  // predicate support
+
+  virtual bool isPredicated(const MachineInstr *MI) const;
+
+  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+
+  virtual
+  bool PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const;
+
+  virtual
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+  virtual bool DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const;
+
+  // PTX is fully-predicable
+  virtual bool isPredicable(MachineInstr *MI) const { return true; }
+
+  // branch support
+
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify = false) const;
+
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+
+  // static helper routines
+
+  static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+                                          DebugLoc dl, EVT VT,
+                                          SDValue Op1);
+
+  static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+                                          DebugLoc dl, EVT VT,
+                                          SDValue Op1, SDValue Op2);
+
+  static void AddDefaultPredicate(MachineInstr *MI);
+
+  static bool IsAnyKindOfBranch(const MachineInstr& inst);
+
+  static bool IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB);
+
+  static MachineBasicBlock *GetBranchTarget(const MachineInstr& inst);
+}; // class PTXInstrInfo
 } // namespace llvm
 
 #endif // PTX_INSTR_INFO_H
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index 9a74778..1ac9d3f 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -18,6 +18,26 @@
 include "PTXInstrFormats.td"
 
 //===----------------------------------------------------------------------===//
+// Code Generation Predicates
+//===----------------------------------------------------------------------===//
+
+// Addressing
+def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
+def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
+
+// Shader Model Support
+def SupportsSM13       : Predicate<"getSubtarget().supportsSM13()">;
+def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">;
+def SupportsSM20       : Predicate<"getSubtarget().supportsSM20()">;
+def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">;
+
+// PTX Version Support
+def SupportsPTX21       : Predicate<"getSubtarget().supportsPTX21()">;
+def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">;
+def SupportsPTX22       : Predicate<"getSubtarget().supportsPTX22()">;
+def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">;
+
+//===----------------------------------------------------------------------===//
 // Instruction Pattern Stuff
 //===----------------------------------------------------------------------===//
 
@@ -107,24 +127,41 @@ def store_shared
 }]>;
 
 // Addressing modes.
-def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
-def ADDRii : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
+def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
+def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
+def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
+def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
+def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
 
 // Address operands
-def MEMri : Operand<i32> {
+def MEMri32 : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops RRegu32, i32imm);
+}
+def MEMri64 : Operand<i64> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops RRegs32, i32imm);
+  let MIOperandInfo = (ops RRegu64, i64imm);
 }
-def MEMii : Operand<i32> {
+def MEMii32 : Operand<i32> {
   let PrintMethod = "printMemOperand";
   let MIOperandInfo = (ops i32imm, i32imm);
 }
+def MEMii64 : Operand<i64> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops i64imm, i64imm);
+}
+// The operand here does not correspond to an actual address, so we
+// can use i32 in 64-bit address modes.
 def MEMpi : Operand<i32> {
   let PrintMethod = "printParamOperand";
   let MIOperandInfo = (ops i32imm);
 }
 
+// Branch & call targets have OtherVT type.
+def brtarget   : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
 //===----------------------------------------------------------------------===//
 // PTX Specific Node Definitions
 //===----------------------------------------------------------------------===//
@@ -138,66 +175,389 @@ def PTXexit
   : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>;
 def PTXret
   : SDNode<"PTXISD::RET",  SDTNone, [SDNPHasChain]>;
+def PTXcopyaddress
+  : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>;
 
 //===----------------------------------------------------------------------===//
 // Instruction Class Templates
 //===----------------------------------------------------------------------===//
 
+//===- Floating-Point Instructions - 2 Operand Form -----------------------===//
+multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> {
+  def rr32 : InstPTX<(outs RRegf32:$d),
+                     (ins RRegf32:$a),
+                     !strconcat(opcstr, ".f32\t$d, $a"),
+                     [(set RRegf32:$d, (opnode RRegf32:$a))]>;
+  def ri32 : InstPTX<(outs RRegf32:$d),
+                     (ins f32imm:$a),
+                     !strconcat(opcstr, ".f32\t$d, $a"),
+                     [(set RRegf32:$d, (opnode fpimm:$a))]>;
+  def rr64 : InstPTX<(outs RRegf64:$d),
+                     (ins RRegf64:$a),
+                     !strconcat(opcstr, ".f64\t$d, $a"),
+                     [(set RRegf64:$d, (opnode RRegf64:$a))]>;
+  def ri64 : InstPTX<(outs RRegf64:$d),
+                     (ins f64imm:$a),
+                     !strconcat(opcstr, ".f64\t$d, $a"),
+                     [(set RRegf64:$d, (opnode fpimm:$a))]>;
+}
+
+//===- Floating-Point Instructions - 3 Operand Form -----------------------===//
+multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> {
+  def rr32 : InstPTX<(outs RRegf32:$d),
+                     (ins RRegf32:$a, RRegf32:$b),
+                     !strconcat(opcstr, ".f32\t$d, $a, $b"),
+                     [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
+  def ri32 : InstPTX<(outs RRegf32:$d),
+                     (ins RRegf32:$a, f32imm:$b),
+                     !strconcat(opcstr, ".f32\t$d, $a, $b"),
+                     [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
+  def rr64 : InstPTX<(outs RRegf64:$d),
+                     (ins RRegf64:$a, RRegf64:$b),
+                     !strconcat(opcstr, ".f64\t$d, $a, $b"),
+                     [(set RRegf64:$d, (opnode RRegf64:$a, RRegf64:$b))]>;
+  def ri64 : InstPTX<(outs RRegf64:$d),
+                     (ins RRegf64:$a, f64imm:$b),
+                     !strconcat(opcstr, ".f64\t$d, $a, $b"),
+                     [(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>;
+}
+
+//===- Floating-Point Instructions - 4 Operand Form -----------------------===//
+multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> {
+  def rrr32 : InstPTX<(outs RRegf32:$d),
+                      (ins RRegf32:$a, RRegf32:$b, RRegf32:$c),
+                      !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
+                      [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
+                                                          RRegf32:$b),
+                                                 RRegf32:$c))]>;
+  def rri32 : InstPTX<(outs RRegf32:$d),
+                      (ins RRegf32:$a, RRegf32:$b, f32imm:$c),
+                      !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
+                      [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
+                                                          RRegf32:$b),
+                                                 fpimm:$c))]>;
+  def rrr64 : InstPTX<(outs RRegf64:$d),
+                      (ins RRegf64:$a, RRegf64:$b, RRegf64:$c),
+                      !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
+                      [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
+                                                          RRegf64:$b),
+                                                 RRegf64:$c))]>;
+  def rri64 : InstPTX<(outs RRegf64:$d),
+                      (ins RRegf64:$a, RRegf64:$b, f64imm:$c),
+                      !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
+                      [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
+                                                          RRegf64:$b),
+                                                 fpimm:$c))]>;
+}
+
 multiclass INT3<string opcstr, SDNode opnode> {
-  def rr : InstPTX<(outs RRegs32:$d),
-                   (ins RRegs32:$a, RRegs32:$b),
-                   !strconcat(opcstr, ".%type\t$d, $a, $b"),
-                   [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
-  def ri : InstPTX<(outs RRegs32:$d),
-                   (ins RRegs32:$a, i32imm:$b),
-                   !strconcat(opcstr, ".%type\t$d, $a, $b"),
-                   [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
+  def rr16 : InstPTX<(outs RRegu16:$d),
+                     (ins RRegu16:$a, RRegu16:$b),
+                     !strconcat(opcstr, ".u16\t$d, $a, $b"),
+                     [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
+  def ri16 : InstPTX<(outs RRegu16:$d),
+                     (ins RRegu16:$a, i16imm:$b),
+                     !strconcat(opcstr, ".u16\t$d, $a, $b"),
+                     [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
+  def rr32 : InstPTX<(outs RRegu32:$d),
+                     (ins RRegu32:$a, RRegu32:$b),
+                     !strconcat(opcstr, ".u32\t$d, $a, $b"),
+                     [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
+  def ri32 : InstPTX<(outs RRegu32:$d),
+                     (ins RRegu32:$a, i32imm:$b),
+                     !strconcat(opcstr, ".u32\t$d, $a, $b"),
+                     [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
+  def rr64 : InstPTX<(outs RRegu64:$d),
+                     (ins RRegu64:$a, RRegu64:$b),
+                     !strconcat(opcstr, ".u64\t$d, $a, $b"),
+                     [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
+  def ri64 : InstPTX<(outs RRegu64:$d),
+                     (ins RRegu64:$a, i64imm:$b),
+                     !strconcat(opcstr, ".u64\t$d, $a, $b"),
+                     [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
+}
+
+multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
+  def ripreds : InstPTX<(outs Preds:$d),
+                     (ins Preds:$a, i1imm:$b),
+                     !strconcat(opcstr, ".pred\t$d, $a, $b"),
+                     [(set Preds:$d, (opnode Preds:$a, imm:$b))]>;
+  def rrpreds : InstPTX<(outs Preds:$d),
+                     (ins Preds:$a, Preds:$b),
+                     !strconcat(opcstr, ".pred\t$d, $a, $b"),
+                     [(set Preds:$d, (opnode Preds:$a, Preds:$b))]>;
+  def rr16 : InstPTX<(outs RRegu16:$d),
+                     (ins RRegu16:$a, RRegu16:$b),
+                     !strconcat(opcstr, ".b16\t$d, $a, $b"),
+                     [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
+  def ri16 : InstPTX<(outs RRegu16:$d),
+                     (ins RRegu16:$a, i16imm:$b),
+                     !strconcat(opcstr, ".b16\t$d, $a, $b"),
+                     [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
+  def rr32 : InstPTX<(outs RRegu32:$d),
+                     (ins RRegu32:$a, RRegu32:$b),
+                     !strconcat(opcstr, ".b32\t$d, $a, $b"),
+                     [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
+  def ri32 : InstPTX<(outs RRegu32:$d),
+                     (ins RRegu32:$a, i32imm:$b),
+                     !strconcat(opcstr, ".b32\t$d, $a, $b"),
+                     [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
+  def rr64 : InstPTX<(outs RRegu64:$d),
+                     (ins RRegu64:$a, RRegu64:$b),
+                     !strconcat(opcstr, ".b64\t$d, $a, $b"),
+                     [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
+  def ri64 : InstPTX<(outs RRegu64:$d),
+                     (ins RRegu64:$a, i64imm:$b),
+                     !strconcat(opcstr, ".b64\t$d, $a, $b"),
+                     [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
 }
 
-// no %type directive, non-communtable
 multiclass INT3ntnc<string opcstr, SDNode opnode> {
-  def rr : InstPTX<(outs RRegs32:$d),
-                   (ins RRegs32:$a, RRegs32:$b),
-                   !strconcat(opcstr, "\t$d, $a, $b"),
-                   [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
-  def ri : InstPTX<(outs RRegs32:$d),
-                   (ins RRegs32:$a, i32imm:$b),
-                   !strconcat(opcstr, "\t$d, $a, $b"),
-                   [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
-  def ir : InstPTX<(outs RRegs32:$d),
-                   (ins i32imm:$a, RRegs32:$b),
-                   !strconcat(opcstr, "\t$d, $a, $b"),
-                   [(set RRegs32:$d, (opnode imm:$a, RRegs32:$b))]>;
+  def rr16 : InstPTX<(outs RRegu16:$d),
+                     (ins RRegu16:$a, RRegu16:$b),
+                     !strconcat(opcstr, "16\t$d, $a, $b"),
+                     [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
+  def rr32 : InstPTX<(outs RRegu32:$d),
+                     (ins RRegu32:$a, RRegu32:$b),
+                     !strconcat(opcstr, "32\t$d, $a, $b"),
+                     [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
+  def rr64 : InstPTX<(outs RRegu64:$d),
+                     (ins RRegu64:$a, RRegu64:$b),
+                     !strconcat(opcstr, "64\t$d, $a, $b"),
+                     [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
+  def ri16 : InstPTX<(outs RRegu16:$d),
+                     (ins RRegu16:$a, i16imm:$b),
+                     !strconcat(opcstr, "16\t$d, $a, $b"),
+                     [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
+  def ri32 : InstPTX<(outs RRegu32:$d),
+                     (ins RRegu32:$a, i32imm:$b),
+                     !strconcat(opcstr, "32\t$d, $a, $b"),
+                     [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
+  def ri64 : InstPTX<(outs RRegu64:$d),
+                     (ins RRegu64:$a, i64imm:$b),
+                     !strconcat(opcstr, "64\t$d, $a, $b"),
+                     [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
+  def ir16 : InstPTX<(outs RRegu16:$d),
+                     (ins i16imm:$a, RRegu16:$b),
+                     !strconcat(opcstr, "16\t$d, $a, $b"),
+                     [(set RRegu16:$d, (opnode imm:$a, RRegu16:$b))]>;
+  def ir32 : InstPTX<(outs RRegu32:$d),
+                     (ins i32imm:$a, RRegu32:$b),
+                     !strconcat(opcstr, "32\t$d, $a, $b"),
+                     [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>;
+  def ir64 : InstPTX<(outs RRegu64:$d),
+                     (ins i64imm:$a, RRegu64:$b),
+                     !strconcat(opcstr, "64\t$d, $a, $b"),
+                     [(set RRegu64:$d, (opnode imm:$a, RRegu64:$b))]>;
 }
 
-multiclass PTX_LD<string opstr, RegisterClass RC, PatFrag pat_load> {
-  def rr : InstPTX<(outs RC:$d),
-                   (ins MEMri:$a),
-                   !strconcat(opstr, ".%type\t$d, [$a]"),
-                   [(set RC:$d, (pat_load ADDRrr:$a))]>;
-  def ri : InstPTX<(outs RC:$d),
-                   (ins MEMri:$a),
-                   !strconcat(opstr, ".%type\t$d, [$a]"),
-                   [(set RC:$d, (pat_load ADDRri:$a))]>;
-  def ii : InstPTX<(outs RC:$d),
-                   (ins MEMii:$a),
-                   !strconcat(opstr, ".%type\t$d, [$a]"),
-                   [(set RC:$d, (pat_load ADDRii:$a))]>;
+multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
+                        CondCode cmp, string cmpstr> {
+  // TODO support 5-operand format: p|q, a, b, c
+
+  def rr
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+              !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+              [(set Preds:$p, (setcc RC:$a, RC:$b, cmp))]>;
+  def ri
+    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b),
+              !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+              [(set Preds:$p, (setcc RC:$a, imm:$b, cmp))]>;
+
+  def rr_and_r
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+              [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
+  def ri_and_r
+    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+              [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
+  def rr_or_r
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+              [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
+  def ri_or_r
+    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+              [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
+  def rr_xor_r
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
+  def ri_xor_r
+    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+              [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
+
+  def rr_and_not_r
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
+  def ri_and_not_r
+    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
+  def rr_or_not_r
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
+  def ri_or_not_r
+    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
+  def rr_xor_not_r
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
+  def ri_xor_not_r
+    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
 }
 
-multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> {
-  def rr : InstPTX<(outs),
-                   (ins RC:$d, MEMri:$a),
-                   !strconcat(opstr, ".%type\t[$a], $d"),
-                   [(pat_store RC:$d, ADDRrr:$a)]>;
-  def ri : InstPTX<(outs),
-                   (ins RC:$d, MEMri:$a),
-                   !strconcat(opstr, ".%type\t[$a], $d"),
-                   [(pat_store RC:$d, ADDRri:$a)]>;
-  def ii : InstPTX<(outs),
-                   (ins RC:$d, MEMii:$a),
-                   !strconcat(opstr, ".%type\t[$a], $d"),
-                   [(pat_store RC:$d, ADDRii:$a)]>;
+multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
+                        CondCode ucmp, CondCode ocmp, string cmpstr> {
+  // TODO support 5-operand format: p|q, a, b, c
+
+  def rr_u
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+              !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
+              [(set Preds:$p, (setcc RC:$a, RC:$b, ucmp))]>;
+  def rr_o
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+              !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+              [(set Preds:$p, (setcc RC:$a, RC:$b, ocmp))]>;
+
+  def rr_and_r_u
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"),
+              [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+  def rr_and_r_o
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+              [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+
+  def rr_or_r_u
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"),
+              [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+  def rr_or_r_o
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+              [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+
+  def rr_xor_r_u
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"),
+              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+  def rr_xor_r_o
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+
+  def rr_and_not_r_u
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+  def rr_and_not_r_o
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+
+  def rr_or_not_r_u
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+  def rr_or_not_r_o
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+
+  def rr_xor_not_r_u
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+  def rr_xor_not_r_o
+    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+              !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+}
+
+multiclass PTX_SELP<RegisterClass RC, string regclsname> {
+  def rr
+    : InstPTX<(outs RC:$r), (ins Preds:$a, RC:$b, RC:$c),
+              !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+              [(set RC:$r, (select Preds:$a, RC:$b, RC:$c))]>;
+}
+
+multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
+  def rr32 : InstPTX<(outs RC:$d),
+                     (ins MEMri32:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRrr32:$a))]>, Requires<[Use32BitAddresses]>;
+  def rr64 : InstPTX<(outs RC:$d),
+                     (ins MEMri64:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRrr64:$a))]>, Requires<[Use64BitAddresses]>;
+  def ri32 : InstPTX<(outs RC:$d),
+                     (ins MEMri32:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRri32:$a))]>, Requires<[Use32BitAddresses]>;
+  def ri64 : InstPTX<(outs RC:$d),
+                     (ins MEMri64:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRri64:$a))]>, Requires<[Use64BitAddresses]>;
+  def ii32 : InstPTX<(outs RC:$d),
+                     (ins MEMii32:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRii32:$a))]>, Requires<[Use32BitAddresses]>;
+  def ii64 : InstPTX<(outs RC:$d),
+                     (ins MEMii64:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRii64:$a))]>, Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
+  defm u16 : PTX_LD<opstr, ".u16", RRegu16, pat_load>;
+  defm u32 : PTX_LD<opstr, ".u32", RRegu32, pat_load>;
+  defm u64 : PTX_LD<opstr, ".u64", RRegu64, pat_load>;
+  defm f32 : PTX_LD<opstr, ".f32", RRegf32, pat_load>;
+  defm f64 : PTX_LD<opstr, ".f64", RRegf64, pat_load>;
+}
+
+multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> {
+  def rr32 : InstPTX<(outs),
+                     (ins RC:$d, MEMri32:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                     [(pat_store RC:$d, ADDRrr32:$a)]>, Requires<[Use32BitAddresses]>;
+  def rr64 : InstPTX<(outs),
+                     (ins RC:$d, MEMri64:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                     [(pat_store RC:$d, ADDRrr64:$a)]>, Requires<[Use64BitAddresses]>;
+  def ri32 : InstPTX<(outs),
+                   (ins RC:$d, MEMri32:$a),
+                   !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                   [(pat_store RC:$d, ADDRri32:$a)]>, Requires<[Use32BitAddresses]>;
+  def ri64 : InstPTX<(outs),
+                   (ins RC:$d, MEMri64:$a),
+                   !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                   [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>;
+  def ii32 : InstPTX<(outs),
+                   (ins RC:$d, MEMii32:$a),
+                   !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                   [(pat_store RC:$d, ADDRii32:$a)]>, Requires<[Use32BitAddresses]>;
+  def ii64 : InstPTX<(outs),
+                   (ins RC:$d, MEMii64:$a),
+                   !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                   [(pat_store RC:$d, ADDRii64:$a)]>, Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
+  defm u16 : PTX_ST<opstr, ".u16", RRegu16, pat_store>;
+  defm u32 : PTX_ST<opstr, ".u32", RRegu32, pat_store>;
+  defm u64 : PTX_ST<opstr, ".u64", RRegu64, pat_store>;
+  defm f32 : PTX_ST<opstr, ".f32", RRegf32, pat_store>;
+  defm f64 : PTX_ST<opstr, ".f64", RRegf64, pat_store>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -208,50 +568,392 @@ multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> {
 
 defm ADD : INT3<"add", add>;
 defm SUB : INT3<"sub", sub>;
+defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies
+defm DIV : INT3<"div", udiv>;
+defm REM : INT3<"rem", urem>;
+
+///===- Floating-Point Arithmetic Instructions ----------------------------===//
+
+// Standard Unary Operations
+defm FNEG : PTX_FLOAT_2OP<"neg", fneg>;
+
+// Standard Binary Operations
+defm FADD : PTX_FLOAT_3OP<"add", fadd>;
+defm FSUB : PTX_FLOAT_3OP<"sub", fsub>;
+defm FMUL : PTX_FLOAT_3OP<"mul", fmul>;
+
+// TODO: Allow user selection of rounding modes for fdiv.
+// For division, we need to have f32 and f64 differently.
+// For f32, we just always use .approx since it is supported on all hardware
+// for PTX 1.4+, which is our minimum target.
+def FDIVrr32 : InstPTX<(outs RRegf32:$d),
+                       (ins RRegf32:$a, RRegf32:$b),
+                       "div.approx.f32\t$d, $a, $b",
+                       [(set RRegf32:$d, (fdiv RRegf32:$a, RRegf32:$b))]>;
+def FDIVri32 : InstPTX<(outs RRegf32:$d),
+                       (ins RRegf32:$a, f32imm:$b),
+                       "div.approx.f32\t$d, $a, $b",
+                       [(set RRegf32:$d, (fdiv RRegf32:$a, fpimm:$b))]>;
+
+// For f64, we must specify a rounding for sm 1.3+ but *not* for sm 1.0.
+def FDIVrr64SM13 : InstPTX<(outs RRegf64:$d),
+                           (ins RRegf64:$a, RRegf64:$b),
+                           "div.rn.f64\t$d, $a, $b",
+                           [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
+                   Requires<[SupportsSM13]>;
+def FDIVri64SM13 : InstPTX<(outs RRegf64:$d),
+                           (ins RRegf64:$a, f64imm:$b),
+                           "div.rn.f64\t$d, $a, $b",
+                           [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
+                   Requires<[SupportsSM13]>;
+def FDIVrr64SM10 : InstPTX<(outs RRegf64:$d),
+                           (ins RRegf64:$a, RRegf64:$b),
+                           "div.f64\t$d, $a, $b",
+                           [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
+                   Requires<[DoesNotSupportSM13]>;
+def FDIVri64SM10 : InstPTX<(outs RRegf64:$d),
+                           (ins RRegf64:$a, f64imm:$b),
+                           "div.f64\t$d, $a, $b",
+                           [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
+                   Requires<[DoesNotSupportSM13]>;
+
+
+
+// Multi-operation hybrid instructions
+
+// The selection of mad/fma is tricky.  In some cases, they are the *same*
+// instruction, but in other cases we may prefer one or the other.  Also,
+// different PTX versions differ on whether rounding mode flags are required.
+// In the short term, mad is supported on all PTX versions and we use a
+// default rounding mode no matter what shader model or PTX version.
+// TODO: Allow the rounding mode to be selectable through llc.
+defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13]>;
+defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13]>;
+
+///===- Floating-Point Intrinsic Instructions -----------------------------===//
+
+def FSQRT32 : InstPTX<(outs RRegf32:$d),
+                      (ins RRegf32:$a),
+                      "sqrt.rn.f32\t$d, $a",
+                      [(set RRegf32:$d, (fsqrt RRegf32:$a))]>;
+
+def FSQRT64 : InstPTX<(outs RRegf64:$d),
+                      (ins RRegf64:$a),
+                      "sqrt.rn.f64\t$d, $a",
+                      [(set RRegf64:$d, (fsqrt RRegf64:$a))]>;
+
+def FSIN32 : InstPTX<(outs RRegf32:$d),
+                     (ins RRegf32:$a),
+                     "sin.approx.f32\t$d, $a",
+                     [(set RRegf32:$d, (fsin RRegf32:$a))]>;
+
+def FSIN64 : InstPTX<(outs RRegf64:$d),
+                     (ins RRegf64:$a),
+                     "sin.approx.f64\t$d, $a",
+                     [(set RRegf64:$d, (fsin RRegf64:$a))]>;
+
+def FCOS32 : InstPTX<(outs RRegf32:$d),
+                     (ins RRegf32:$a),
+                     "cos.approx.f32\t$d, $a",
+                     [(set RRegf32:$d, (fcos RRegf32:$a))]>;
+
+def FCOS64 : InstPTX<(outs RRegf64:$d),
+                     (ins RRegf64:$a),
+                     "cos.approx.f64\t$d, $a",
+                     [(set RRegf64:$d, (fcos RRegf64:$a))]>;
+
+
+///===- Comparison and Selection Instructions -----------------------------===//
+
+// Compare u16
+
+defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ,  "eq">;
+defm SETPNEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETNE,  "ne">;
+defm SETPLTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULT, "lt">;
+defm SETPLEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULE, "le">;
+defm SETPGTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGT, "gt">;
+defm SETPGEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGE, "ge">;
+
+// Compare u32
+
+defm SETPEQu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETEQ,  "eq">;
+defm SETPNEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETNE,  "ne">;
+defm SETPLTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULT, "lt">;
+defm SETPLEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULE, "le">;
+defm SETPGTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGT, "gt">;
+defm SETPGEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGE, "ge">;
+
+// Compare u64
+
+defm SETPEQu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETEQ,  "eq">;
+defm SETPNEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETNE,  "ne">;
+defm SETPLTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULT, "lt">;
+defm SETPLEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULE, "le">;
+defm SETPGTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGT, "gt">;
+defm SETPGEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGE, "ge">;
+
+// Compare f32
+
+defm SETPEQf32 : PTX_SETP_FP<RRegf32, "f32", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf32 : PTX_SETP_FP<RRegf32, "f32", SETUNE, SETONE, "ne">;
+defm SETPLTf32 : PTX_SETP_FP<RRegf32, "f32", SETULT, SETOLT, "lt">;
+defm SETPLEf32 : PTX_SETP_FP<RRegf32, "f32", SETULE, SETOLE, "le">;
+defm SETPGTf32 : PTX_SETP_FP<RRegf32, "f32", SETUGT, SETOGT, "gt">;
+defm SETPGEf32 : PTX_SETP_FP<RRegf32, "f32", SETUGE, SETOGE, "ge">;
+
+// Compare f64
+
+defm SETPEQf64 : PTX_SETP_FP<RRegf64, "f64", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf64 : PTX_SETP_FP<RRegf64, "f64", SETUNE, SETONE, "ne">;
+defm SETPLTf64 : PTX_SETP_FP<RRegf64, "f64", SETULT, SETOLT, "lt">;
+defm SETPLEf64 : PTX_SETP_FP<RRegf64, "f64", SETULE, SETOLE, "le">;
+defm SETPGTf64 : PTX_SETP_FP<RRegf64, "f64", SETUGT, SETOGT, "gt">;
+defm SETPGEf64 : PTX_SETP_FP<RRegf64, "f64", SETUGE, SETOGE, "ge">;
+
+// .selp
+
+defm PTX_SELPu16 : PTX_SELP<RRegu16, "u16">;
+defm PTX_SELPu32 : PTX_SELP<RRegu32, "u32">;
+defm PTX_SELPu64 : PTX_SELP<RRegu64, "u64">;
+defm PTX_SELPf32 : PTX_SELP<RRegf32, "f32">;
+defm PTX_SELPf64 : PTX_SELP<RRegf64, "f64">;
 
 ///===- Logic and Shift Instructions --------------------------------------===//
 
-defm SHL : INT3ntnc<"shl.b32", PTXshl>;
-defm SRL : INT3ntnc<"shr.u32", PTXsrl>;
-defm SRA : INT3ntnc<"shr.s32", PTXsra>;
+defm SHL : INT3ntnc<"shl.b", PTXshl>;
+defm SRL : INT3ntnc<"shr.u", PTXsrl>;
+defm SRA : INT3ntnc<"shr.s", PTXsra>;
+
+defm AND : PTX_LOGIC<"and", and>;
+defm OR  : PTX_LOGIC<"or",  or>;
+defm XOR : PTX_LOGIC<"xor", xor>;
 
 ///===- Data Movement and Conversion Instructions -------------------------===//
 
 let neverHasSideEffects = 1 in {
-  // rely on isMoveInstr to separate MOVpp, MOVrr, etc.
-  def MOVpp
+  def MOVPREDrr
     : InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
-  def MOVrr
-    : InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.%type\t$d, $a", []>;
+  def MOVU16rr
+    : InstPTX<(outs RRegu16:$d), (ins RRegu16:$a), "mov.u16\t$d, $a", []>;
+  def MOVU32rr
+    : InstPTX<(outs RRegu32:$d), (ins RRegu32:$a), "mov.u32\t$d, $a", []>;
+  def MOVU64rr
+    : InstPTX<(outs RRegu64:$d), (ins RRegu64:$a), "mov.u64\t$d, $a", []>;
+  def MOVF32rr
+    : InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>;
+  def MOVF64rr
+    : InstPTX<(outs RRegf64:$d), (ins RRegf64:$a), "mov.f64\t$d, $a", []>;
 }
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-  def MOVpi
+  def MOVPREDri
     : InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
               [(set Preds:$d, imm:$a)]>;
-  def MOVri
-    : InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a",
-              [(set RRegs32:$d, imm:$a)]>;
+  def MOVU16ri
+    : InstPTX<(outs RRegu16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
+              [(set RRegu16:$d, imm:$a)]>;
+  def MOVU32ri
+    : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+              [(set RRegu32:$d, imm:$a)]>;
+  def MOVU64ri
+    : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+              [(set RRegu64:$d, imm:$a)]>;
+  def MOVF32ri
+    : InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
+              [(set RRegf32:$d, fpimm:$a)]>;
+  def MOVF64ri
+    : InstPTX<(outs RRegf64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
+              [(set RRegf64:$d, fpimm:$a)]>;
 }
 
-defm LDg : PTX_LD<"ld.global", RRegs32, load_global>;
-defm LDc : PTX_LD<"ld.const",  RRegs32, load_constant>;
-defm LDl : PTX_LD<"ld.local",  RRegs32, load_local>;
-defm LDp : PTX_LD<"ld.param",  RRegs32, load_parameter>;
-defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+  def MOVaddr32
+    : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+              [(set RRegu32:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+  def MOVaddr64
+    : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+              [(set RRegu64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+}
+
+// Loads
+defm LDg : PTX_LD_ALL<"ld.global", load_global>;
+defm LDc : PTX_LD_ALL<"ld.const",  load_constant>;
+defm LDl : PTX_LD_ALL<"ld.local",  load_local>;
+defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
+
+// This is a special instruction that is manually inserted for kernel parameters
+def LDpiU16 : InstPTX<(outs RRegu16:$d), (ins MEMpi:$a),
+                      "ld.param.u16\t$d, [$a]", []>;
+def LDpiU32 : InstPTX<(outs RRegu32:$d), (ins MEMpi:$a),
+                      "ld.param.u32\t$d, [$a]", []>;
+def LDpiU64 : InstPTX<(outs RRegu64:$d), (ins MEMpi:$a),
+                      "ld.param.u64\t$d, [$a]", []>;
+def LDpiF32 : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
+                      "ld.param.f32\t$d, [$a]", []>;
+def LDpiF64 : InstPTX<(outs RRegf64:$d), (ins MEMpi:$a),
+                      "ld.param.f64\t$d, [$a]", []>;
+
+// Stores
+defm STg : PTX_ST_ALL<"st.global", store_global>;
+defm STl : PTX_ST_ALL<"st.local",  store_local>;
+defm STs : PTX_ST_ALL<"st.shared", store_shared>;
+
+// defm STp : PTX_ST_ALL<"st.param",  store_parameter>;
+// defm LDp : PTX_LD_ALL<"ld.param",  load_parameter>;
+// TODO: Do something with st.param if/when it is needed.
+
+// Conversion to pred
+
+def CVT_pred_u16
+  : InstPTX<(outs Preds:$d), (ins RRegu16:$a), "cvt.pred.u16\t$d, $a",
+            [(set Preds:$d, (trunc RRegu16:$a))]>;
 
-def LDpi : InstPTX<(outs RRegs32:$d), (ins MEMpi:$a),
-                   "ld.param.%type\t$d, [$a]", []>;
+def CVT_pred_u32
+  : InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a",
+            [(set Preds:$d, (trunc RRegu32:$a))]>;
 
-defm STg : PTX_ST<"st.global", RRegs32, store_global>;
-defm STl : PTX_ST<"st.local",  RRegs32, store_local>;
-// Store to parameter state space requires PTX 2.0 or higher?
-// defm STp : PTX_ST<"st.param",  RRegs32, store_parameter>;
-defm STs : PTX_ST<"st.shared", RRegs32, store_shared>;
+def CVT_pred_u64
+  : InstPTX<(outs Preds:$d), (ins RRegu64:$a), "cvt.pred.u64\t$d, $a",
+            [(set Preds:$d, (trunc RRegu64:$a))]>;
+
+def CVT_pred_f32
+  : InstPTX<(outs Preds:$d), (ins RRegf32:$a), "cvt.rni.pred.f32\t$d, $a",
+            [(set Preds:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_pred_f64
+  : InstPTX<(outs Preds:$d), (ins RRegf64:$a), "cvt.rni.pred.f64\t$d, $a",
+            [(set Preds:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to u16
+
+def CVT_u16_pred
+  : InstPTX<(outs RRegu16:$d), (ins Preds:$a), "cvt.u16.pred\t$d, $a",
+            [(set RRegu16:$d, (zext Preds:$a))]>;
+
+def CVT_u16_u32
+  : InstPTX<(outs RRegu16:$d), (ins RRegu32:$a), "cvt.u16.u32\t$d, $a",
+            [(set RRegu16:$d, (trunc RRegu32:$a))]>;
+
+def CVT_u16_u64
+  : InstPTX<(outs RRegu16:$d), (ins RRegu64:$a), "cvt.u16.u64\t$d, $a",
+            [(set RRegu16:$d, (trunc RRegu64:$a))]>;
+
+def CVT_u16_f32
+  : InstPTX<(outs RRegu16:$d), (ins RRegf32:$a), "cvt.rni.u16.f32\t$d, $a",
+            [(set RRegu16:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_u16_f64
+  : InstPTX<(outs RRegu16:$d), (ins RRegf64:$a), "cvt.rni.u16.f64\t$d, $a",
+            [(set RRegu16:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to u32
+
+def CVT_u32_pred
+  : InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a",
+            [(set RRegu32:$d, (zext Preds:$a))]>;
+
+def CVT_u32_u16
+  : InstPTX<(outs RRegu32:$d), (ins RRegu16:$a), "cvt.u32.u16\t$d, $a",
+            [(set RRegu32:$d, (zext RRegu16:$a))]>;
+
+def CVT_u32_u64
+  : InstPTX<(outs RRegu32:$d), (ins RRegu64:$a), "cvt.u32.u64\t$d, $a",
+            [(set RRegu32:$d, (trunc RRegu64:$a))]>;
+
+def CVT_u32_f32
+  : InstPTX<(outs RRegu32:$d), (ins RRegf32:$a), "cvt.rni.u32.f32\t$d, $a",
+            [(set RRegu32:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_u32_f64
+  : InstPTX<(outs RRegu32:$d), (ins RRegf64:$a), "cvt.rni.u32.f64\t$d, $a",
+            [(set RRegu32:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to u64
+
+def CVT_u64_pred
+  : InstPTX<(outs RRegu64:$d), (ins Preds:$a), "cvt.u64.pred\t$d, $a",
+            [(set RRegu64:$d, (zext Preds:$a))]>;
+
+def CVT_u64_u16
+  : InstPTX<(outs RRegu64:$d), (ins RRegu16:$a), "cvt.u64.u16\t$d, $a",
+            [(set RRegu64:$d, (zext RRegu16:$a))]>;
+
+def CVT_u64_u32
+  : InstPTX<(outs RRegu64:$d), (ins RRegu32:$a), "cvt.u64.u32\t$d, $a",
+            [(set RRegu64:$d, (zext RRegu32:$a))]>;
+
+def CVT_u64_f32
+  : InstPTX<(outs RRegu64:$d), (ins RRegf32:$a), "cvt.rni.u64.f32\t$d, $a",
+            [(set RRegu64:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_u64_f64
+  : InstPTX<(outs RRegu64:$d), (ins RRegf64:$a), "cvt.rni.u64.f64\t$d, $a",
+            [(set RRegu64:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to f32
+
+def CVT_f32_pred
+  : InstPTX<(outs RRegf32:$d), (ins Preds:$a), "cvt.rn.f32.pred\t$d, $a",
+            [(set RRegf32:$d, (uint_to_fp Preds:$a))]>;
+
+def CVT_f32_u16
+  : InstPTX<(outs RRegf32:$d), (ins RRegu16:$a), "cvt.rn.f32.u16\t$d, $a",
+            [(set RRegf32:$d, (uint_to_fp RRegu16:$a))]>;
+
+def CVT_f32_u32
+  : InstPTX<(outs RRegf32:$d), (ins RRegu32:$a), "cvt.rn.f32.u32\t$d, $a",
+            [(set RRegf32:$d, (uint_to_fp RRegu32:$a))]>;
+
+def CVT_f32_u64
+  : InstPTX<(outs RRegf32:$d), (ins RRegu64:$a), "cvt.rn.f32.u64\t$d, $a",
+            [(set RRegf32:$d, (uint_to_fp RRegu64:$a))]>;
+
+def CVT_f32_f64
+  : InstPTX<(outs RRegf32:$d), (ins RRegf64:$a), "cvt.rn.f32.f64\t$d, $a",
+            [(set RRegf32:$d, (fround RRegf64:$a))]>;
+
+// Conversion to f64
+
+def CVT_f64_pred
+  : InstPTX<(outs RRegf64:$d), (ins Preds:$a), "cvt.rn.f64.pred\t$d, $a",
+            [(set RRegf64:$d, (uint_to_fp Preds:$a))]>;
+
+def CVT_f64_u16
+  : InstPTX<(outs RRegf64:$d), (ins RRegu16:$a), "cvt.rn.f64.u16\t$d, $a",
+            [(set RRegf64:$d, (uint_to_fp RRegu16:$a))]>;
+
+def CVT_f64_u32
+  : InstPTX<(outs RRegf64:$d), (ins RRegu32:$a), "cvt.rn.f64.u32\t$d, $a",
+            [(set RRegf64:$d, (uint_to_fp RRegu32:$a))]>;
+
+def CVT_f64_u64
+  : InstPTX<(outs RRegf64:$d), (ins RRegu64:$a), "cvt.rn.f64.u64\t$d, $a",
+            [(set RRegf64:$d, (uint_to_fp RRegu64:$a))]>;
+
+def CVT_f64_f32
+  : InstPTX<(outs RRegf64:$d), (ins RRegf32:$a), "cvt.f64.f32\t$d, $a",
+            [(set RRegf64:$d, (fextend RRegf32:$a))]>;
 
 ///===- Control Flow Instructions -----------------------------------------===//
 
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+  def BRAd
+    : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+  // FIXME: The pattern part is blank because I cannot (or do not yet know
+  // how to) use the first operand of PredicateOperand (a Preds register) here
+  def BRAdp
+    : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
+              [/*(brcond pred:$_p, bb:$d)*/]>;
+}
+
 let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
   def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
   def RET  : InstPTX<(outs), (ins), "ret",  [(PTXret)]>;
 }
+
+///===- Intrinsic Instructions --------------------------------------------===//
+
+include "PTXIntrinsicInstrInfo.td"
diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td
new file mode 100644
index 0000000..320934a
--- /dev/null
+++ b/lib/Target/PTX/PTXIntrinsicInstrInfo.td
@@ -0,0 +1,84 @@
+//===- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the PTX-specific intrinsic instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Special Purpose Register Accessor Intrinsics
+
+class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
+  : InstPTX<(outs RRegu64:$d), (ins),
+            !strconcat("mov.u64\t$d, %", regname),
+            [(set RRegu64:$d, (intop))]>;
+
+class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
+  : InstPTX<(outs RRegu32:$d), (ins),
+            !strconcat("mov.u32\t$d, %", regname),
+            [(set RRegu32:$d, (intop))]>;
+
+// TODO Add read vector-version of special registers
+
+//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>;
+def PTX_READ_TID_X   : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", int_ptx_read_tid_x>;
+def PTX_READ_TID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", int_ptx_read_tid_y>;
+def PTX_READ_TID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", int_ptx_read_tid_z>;
+def PTX_READ_TID_W   : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", int_ptx_read_tid_w>;
+
+//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>;
+def PTX_READ_NTID_X   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", int_ptx_read_ntid_x>;
+def PTX_READ_NTID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", int_ptx_read_ntid_y>;
+def PTX_READ_NTID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", int_ptx_read_ntid_z>;
+def PTX_READ_NTID_W   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", int_ptx_read_ntid_w>;
+
+def PTX_READ_LANEID  : PTX_READ_SPECIAL_REGISTER_R32<"laneid", int_ptx_read_laneid>;
+def PTX_READ_WARPID  : PTX_READ_SPECIAL_REGISTER_R32<"warpid", int_ptx_read_warpid>;
+def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", int_ptx_read_nwarpid>;
+
+//def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
+def PTX_READ_CTAID_X   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", int_ptx_read_ctaid_x>;
+def PTX_READ_CTAID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", int_ptx_read_ctaid_y>;
+def PTX_READ_CTAID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", int_ptx_read_ctaid_z>;
+def PTX_READ_CTAID_W   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", int_ptx_read_ctaid_w>;
+
+//def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
+def PTX_READ_NCTAID_X   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", int_ptx_read_nctaid_x>;
+def PTX_READ_NCTAID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", int_ptx_read_nctaid_y>;
+def PTX_READ_NCTAID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", int_ptx_read_nctaid_z>;
+def PTX_READ_NCTAID_W   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", int_ptx_read_nctaid_w>;
+
+def PTX_READ_SMID  : PTX_READ_SPECIAL_REGISTER_R32<"smid", int_ptx_read_smid>;
+def PTX_READ_NSMID  : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", int_ptx_read_nsmid>;
+def PTX_READ_GRIDID  : PTX_READ_SPECIAL_REGISTER_R32<"gridid", int_ptx_read_gridid>;
+
+def PTX_READ_LANEMASK_EQ
+  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
+def PTX_READ_LANEMASK_LE
+  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
+def PTX_READ_LANEMASK_LT
+  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
+def PTX_READ_LANEMASK_GE
+  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
+def PTX_READ_LANEMASK_GT
+  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
+
+def PTX_READ_CLOCK
+  : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
+def PTX_READ_CLOCK64
+  : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
+
+def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
+def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
+def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
+def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
+
+// PTX Parallel Synchronization and Communication Intrinsics
+
+def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i",
+                           [(int_ptx_bar_sync imm:$i)]>;
diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp
index 0886ba8..1574670 100644
--- a/lib/Target/PTX/PTXMCAsmStreamer.cpp
+++ b/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -143,9 +143,9 @@ public:
   virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
 
   virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                             bool isPCRel, unsigned AddrSpace);
-  virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
-  virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+                             unsigned AddrSpace);
+  virtual void EmitULEB128Value(const MCExpr *Value);
+  virtual void EmitSLEB128Value(const MCExpr *Value);
   virtual void EmitGPRel32Value(const MCExpr *Value);
 
 
@@ -233,7 +233,7 @@ void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) {
 void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
   assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(getCurrentSection() && "Cannot emit before setting section!");
+  //assert(getCurrentSection() && "Cannot emit before setting section!");
 
   OS << *Symbol << MAI.getLabelSuffix();
   EmitEOL();
@@ -352,9 +352,8 @@ void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
 }
 
 void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
-                                     bool isPCRel, unsigned AddrSpace) {
+                                     unsigned AddrSpace) {
   assert(getCurrentSection() && "Cannot emit contents before setting section!");
-  assert(!isPCRel && "Cannot emit pc relative relocations!");
   const char *Directive = 0;
   switch (Size) {
   default: break;
@@ -383,15 +382,13 @@ void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
   EmitEOL();
 }
 
-void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value,
-                                        unsigned AddrSpace) {
+void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
   assert(MAI.hasLEB128() && "Cannot print a .uleb");
   OS << ".uleb128 " << *Value;
   EmitEOL();
 }
 
-void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value,
-                                        unsigned AddrSpace) {
+void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
   assert(MAI.hasLEB128() && "Cannot print a .sleb");
   OS << ".sleb128 " << *Value;
   EmitEOL();
@@ -423,7 +420,8 @@ void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
   MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
 }
 
-void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+                                            int64_t Value,
                                             unsigned ValueSize,
                                             unsigned MaxBytesToEmit) {
   // Some assemblers don't support non-power of two alignments, so we always
@@ -532,7 +530,7 @@ void PTXMCAsmStreamer::Finish() {}
 namespace llvm {
   MCStreamer *createPTXAsmStreamer(MCContext &Context,
                                    formatted_raw_ostream &OS,
-                                   bool isVerboseAsm, bool useLoc,
+                                   bool isVerboseAsm, bool useLoc, bool useCFI,
                                    MCInstPrinter *IP,
                                    MCCodeEmitter *CE, TargetAsmBackend *TAB,
                                    bool ShowInst) {
diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp
index b37c740..c5e1910 100644
--- a/lib/Target/PTX/PTXMFInfoExtract.cpp
+++ b/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -79,12 +79,12 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
 
   DEBUG(for (PTXMachineFunctionInfo::reg_iterator
              i = MFI->argRegBegin(), e = MFI->argRegEnd();
-	     i != e; ++i)
+             i != e; ++i)
         dbgs() << "Arg Reg: " << *i << "\n";);
 
   DEBUG(for (PTXMachineFunctionInfo::reg_iterator
              i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
-	     i != e; ++i)
+             i != e; ++i)
         dbgs() << "Local Var Reg: " << *i << "\n";);
 
   return false;
diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h
index 56d044b..81df1c2 100644
--- a/lib/Target/PTX/PTXMachineFunctionInfo.h
+++ b/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -42,36 +42,37 @@ public:
   void setRetReg(unsigned reg) { reg_ret = reg; }
 
   void doneAddArg(void) {
-    std::sort(reg_arg.begin(), reg_arg.end());
     _isDoneAddArg = true;
   }
-  void doneAddLocalVar(void) {
-    std::sort(reg_local_var.begin(), reg_local_var.end());
-  }
+  void doneAddLocalVar(void) {}
 
   bool isDoneAddArg(void) { return _isDoneAddArg; }
 
   bool isKernel() const { return is_kernel; }
 
-  typedef std::vector<unsigned>::const_iterator reg_iterator;
+  typedef std::vector<unsigned>::const_iterator         reg_iterator;
+  typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator;
 
-  bool argRegEmpty() const { return reg_arg.empty(); }
-  int getNumArg() const { return reg_arg.size(); }
+  bool         argRegEmpty() const { return reg_arg.empty(); }
+  int          getNumArg() const { return reg_arg.size(); }
   reg_iterator argRegBegin() const { return reg_arg.begin(); }
   reg_iterator argRegEnd()   const { return reg_arg.end(); }
+  reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); }
+  reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); }
 
-  bool localVarRegEmpty() const { return reg_local_var.empty(); }
+  bool         localVarRegEmpty() const { return reg_local_var.empty(); }
   reg_iterator localVarRegBegin() const { return reg_local_var.begin(); }
   reg_iterator localVarRegEnd()   const { return reg_local_var.end(); }
 
   unsigned retReg() const { return reg_ret; }
 
   bool isArgReg(unsigned reg) const {
-    return std::binary_search(reg_arg.begin(), reg_arg.end(), reg);
+    return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end();
   }
 
   bool isLocalVarReg(unsigned reg) const {
-    return std::binary_search(reg_local_var.begin(), reg_local_var.end(), reg);
+    return std::find(reg_local_var.begin(), reg_local_var.end(), reg)
+      != reg_local_var.end();
   }
 }; // class PTXMachineFunctionInfo
 } // namespace llvm
diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td
index 22e2b34..f616141 100644
--- a/lib/Target/PTX/PTXRegisterInfo.td
+++ b/lib/Target/PTX/PTXRegisterInfo.td
@@ -19,6 +19,8 @@ class PTXReg<string n> : Register<n> {
 //  Registers
 //===----------------------------------------------------------------------===//
 
+///===- Predicate Registers -----------------------------------------------===//
+
 def P0  : PTXReg<"p0">;
 def P1  : PTXReg<"p1">;
 def P2  : PTXReg<"p2">;
@@ -51,6 +53,108 @@ def P28 : PTXReg<"p28">;
 def P29 : PTXReg<"p29">;
 def P30 : PTXReg<"p30">;
 def P31 : PTXReg<"p31">;
+def P32 : PTXReg<"p32">;
+def P33 : PTXReg<"p33">;
+def P34 : PTXReg<"p34">;
+def P35 : PTXReg<"p35">;
+def P36 : PTXReg<"p36">;
+def P37 : PTXReg<"p37">;
+def P38 : PTXReg<"p38">;
+def P39 : PTXReg<"p39">;
+def P40 : PTXReg<"p40">;
+def P41 : PTXReg<"p41">;
+def P42 : PTXReg<"p42">;
+def P43 : PTXReg<"p43">;
+def P44 : PTXReg<"p44">;
+def P45 : PTXReg<"p45">;
+def P46 : PTXReg<"p46">;
+def P47 : PTXReg<"p47">;
+def P48 : PTXReg<"p48">;
+def P49 : PTXReg<"p49">;
+def P50 : PTXReg<"p50">;
+def P51 : PTXReg<"p51">;
+def P52 : PTXReg<"p52">;
+def P53 : PTXReg<"p53">;
+def P54 : PTXReg<"p54">;
+def P55 : PTXReg<"p55">;
+def P56 : PTXReg<"p56">;
+def P57 : PTXReg<"p57">;
+def P58 : PTXReg<"p58">;
+def P59 : PTXReg<"p59">;
+def P60 : PTXReg<"p60">;
+def P61 : PTXReg<"p61">;
+def P62 : PTXReg<"p62">;
+def P63 : PTXReg<"p63">;
+
+///===- 16-bit Integer Registers ------------------------------------------===//
+
+def RH0  : PTXReg<"rh0">;
+def RH1  : PTXReg<"rh1">;
+def RH2  : PTXReg<"rh2">;
+def RH3  : PTXReg<"rh3">;
+def RH4  : PTXReg<"rh4">;
+def RH5  : PTXReg<"rh5">;
+def RH6  : PTXReg<"rh6">;
+def RH7  : PTXReg<"rh7">;
+def RH8  : PTXReg<"rh8">;
+def RH9  : PTXReg<"rh9">;
+def RH10 : PTXReg<"rh10">;
+def RH11 : PTXReg<"rh11">;
+def RH12 : PTXReg<"rh12">;
+def RH13 : PTXReg<"rh13">;
+def RH14 : PTXReg<"rh14">;
+def RH15 : PTXReg<"rh15">;
+def RH16 : PTXReg<"rh16">;
+def RH17 : PTXReg<"rh17">;
+def RH18 : PTXReg<"rh18">;
+def RH19 : PTXReg<"rh19">;
+def RH20 : PTXReg<"rh20">;
+def RH21 : PTXReg<"rh21">;
+def RH22 : PTXReg<"rh22">;
+def RH23 : PTXReg<"rh23">;
+def RH24 : PTXReg<"rh24">;
+def RH25 : PTXReg<"rh25">;
+def RH26 : PTXReg<"rh26">;
+def RH27 : PTXReg<"rh27">;
+def RH28 : PTXReg<"rh28">;
+def RH29 : PTXReg<"rh29">;
+def RH30 : PTXReg<"rh30">;
+def RH31 : PTXReg<"rh31">;
+def RH32 : PTXReg<"rh32">;
+def RH33 : PTXReg<"rh33">;
+def RH34 : PTXReg<"rh34">;
+def RH35 : PTXReg<"rh35">;
+def RH36 : PTXReg<"rh36">;
+def RH37 : PTXReg<"rh37">;
+def RH38 : PTXReg<"rh38">;
+def RH39 : PTXReg<"rh39">;
+def RH40 : PTXReg<"rh40">;
+def RH41 : PTXReg<"rh41">;
+def RH42 : PTXReg<"rh42">;
+def RH43 : PTXReg<"rh43">;
+def RH44 : PTXReg<"rh44">;
+def RH45 : PTXReg<"rh45">;
+def RH46 : PTXReg<"rh46">;
+def RH47 : PTXReg<"rh47">;
+def RH48 : PTXReg<"rh48">;
+def RH49 : PTXReg<"rh49">;
+def RH50 : PTXReg<"rh50">;
+def RH51 : PTXReg<"rh51">;
+def RH52 : PTXReg<"rh52">;
+def RH53 : PTXReg<"rh53">;
+def RH54 : PTXReg<"rh54">;
+def RH55 : PTXReg<"rh55">;
+def RH56 : PTXReg<"rh56">;
+def RH57 : PTXReg<"rh57">;
+def RH58 : PTXReg<"rh58">;
+def RH59 : PTXReg<"rh59">;
+def RH60 : PTXReg<"rh60">;
+def RH61 : PTXReg<"rh61">;
+def RH62 : PTXReg<"rh62">;
+def RH63 : PTXReg<"rh63">;
+
+
+///===- 32-bit Integer Registers ------------------------------------------===//
 
 def R0  : PTXReg<"r0">;
 def R1  : PTXReg<"r1">;
@@ -84,6 +188,243 @@ def R28 : PTXReg<"r28">;
 def R29 : PTXReg<"r29">;
 def R30 : PTXReg<"r30">;
 def R31 : PTXReg<"r31">;
+def R32 : PTXReg<"r32">;
+def R33 : PTXReg<"r33">;
+def R34 : PTXReg<"r34">;
+def R35 : PTXReg<"r35">;
+def R36 : PTXReg<"r36">;
+def R37 : PTXReg<"r37">;
+def R38 : PTXReg<"r38">;
+def R39 : PTXReg<"r39">;
+def R40 : PTXReg<"r40">;
+def R41 : PTXReg<"r41">;
+def R42 : PTXReg<"r42">;
+def R43 : PTXReg<"r43">;
+def R44 : PTXReg<"r44">;
+def R45 : PTXReg<"r45">;
+def R46 : PTXReg<"r46">;
+def R47 : PTXReg<"r47">;
+def R48 : PTXReg<"r48">;
+def R49 : PTXReg<"r49">;
+def R50 : PTXReg<"r50">;
+def R51 : PTXReg<"r51">;
+def R52 : PTXReg<"r52">;
+def R53 : PTXReg<"r53">;
+def R54 : PTXReg<"r54">;
+def R55 : PTXReg<"r55">;
+def R56 : PTXReg<"r56">;
+def R57 : PTXReg<"r57">;
+def R58 : PTXReg<"r58">;
+def R59 : PTXReg<"r59">;
+def R60 : PTXReg<"r60">;
+def R61 : PTXReg<"r61">;
+def R62 : PTXReg<"r62">;
+def R63 : PTXReg<"r63">;
+
+
+///===- 64-bit Integer Registers ------------------------------------------===//
+
+def RD0  : PTXReg<"rd0">;
+def RD1  : PTXReg<"rd1">;
+def RD2  : PTXReg<"rd2">;
+def RD3  : PTXReg<"rd3">;
+def RD4  : PTXReg<"rd4">;
+def RD5  : PTXReg<"rd5">;
+def RD6  : PTXReg<"rd6">;
+def RD7  : PTXReg<"rd7">;
+def RD8  : PTXReg<"rd8">;
+def RD9  : PTXReg<"rd9">;
+def RD10 : PTXReg<"rd10">;
+def RD11 : PTXReg<"rd11">;
+def RD12 : PTXReg<"rd12">;
+def RD13 : PTXReg<"rd13">;
+def RD14 : PTXReg<"rd14">;
+def RD15 : PTXReg<"rd15">;
+def RD16 : PTXReg<"rd16">;
+def RD17 : PTXReg<"rd17">;
+def RD18 : PTXReg<"rd18">;
+def RD19 : PTXReg<"rd19">;
+def RD20 : PTXReg<"rd20">;
+def RD21 : PTXReg<"rd21">;
+def RD22 : PTXReg<"rd22">;
+def RD23 : PTXReg<"rd23">;
+def RD24 : PTXReg<"rd24">;
+def RD25 : PTXReg<"rd25">;
+def RD26 : PTXReg<"rd26">;
+def RD27 : PTXReg<"rd27">;
+def RD28 : PTXReg<"rd28">;
+def RD29 : PTXReg<"rd29">;
+def RD30 : PTXReg<"rd30">;
+def RD31 : PTXReg<"rd31">;
+def RD32 : PTXReg<"rd32">;
+def RD33 : PTXReg<"rd33">;
+def RD34 : PTXReg<"rd34">;
+def RD35 : PTXReg<"rd35">;
+def RD36 : PTXReg<"rd36">;
+def RD37 : PTXReg<"rd37">;
+def RD38 : PTXReg<"rd38">;
+def RD39 : PTXReg<"rd39">;
+def RD40 : PTXReg<"rd40">;
+def RD41 : PTXReg<"rd41">;
+def RD42 : PTXReg<"rd42">;
+def RD43 : PTXReg<"rd43">;
+def RD44 : PTXReg<"rd44">;
+def RD45 : PTXReg<"rd45">;
+def RD46 : PTXReg<"rd46">;
+def RD47 : PTXReg<"rd47">;
+def RD48 : PTXReg<"rd48">;
+def RD49 : PTXReg<"rd49">;
+def RD50 : PTXReg<"rd50">;
+def RD51 : PTXReg<"rd51">;
+def RD52 : PTXReg<"rd52">;
+def RD53 : PTXReg<"rd53">;
+def RD54 : PTXReg<"rd54">;
+def RD55 : PTXReg<"rd55">;
+def RD56 : PTXReg<"rd56">;
+def RD57 : PTXReg<"rd57">;
+def RD58 : PTXReg<"rd58">;
+def RD59 : PTXReg<"rd59">;
+def RD60 : PTXReg<"rd60">;
+def RD61 : PTXReg<"rd61">;
+def RD62 : PTXReg<"rd62">;
+def RD63 : PTXReg<"rd63">;
+
+
+///===- 32-bit Floating-Point Registers -----------------------------------===//
+
+def F0  : PTXReg<"f0">;
+def F1  : PTXReg<"f1">;
+def F2  : PTXReg<"f2">;
+def F3  : PTXReg<"f3">;
+def F4  : PTXReg<"f4">;
+def F5  : PTXReg<"f5">;
+def F6  : PTXReg<"f6">;
+def F7  : PTXReg<"f7">;
+def F8  : PTXReg<"f8">;
+def F9  : PTXReg<"f9">;
+def F10 : PTXReg<"f10">;
+def F11 : PTXReg<"f11">;
+def F12 : PTXReg<"f12">;
+def F13 : PTXReg<"f13">;
+def F14 : PTXReg<"f14">;
+def F15 : PTXReg<"f15">;
+def F16 : PTXReg<"f16">;
+def F17 : PTXReg<"f17">;
+def F18 : PTXReg<"f18">;
+def F19 : PTXReg<"f19">;
+def F20 : PTXReg<"f20">;
+def F21 : PTXReg<"f21">;
+def F22 : PTXReg<"f22">;
+def F23 : PTXReg<"f23">;
+def F24 : PTXReg<"f24">;
+def F25 : PTXReg<"f25">;
+def F26 : PTXReg<"f26">;
+def F27 : PTXReg<"f27">;
+def F28 : PTXReg<"f28">;
+def F29 : PTXReg<"f29">;
+def F30 : PTXReg<"f30">;
+def F31 : PTXReg<"f31">;
+def F32 : PTXReg<"f32">;
+def F33 : PTXReg<"f33">;
+def F34 : PTXReg<"f34">;
+def F35 : PTXReg<"f35">;
+def F36 : PTXReg<"f36">;
+def F37 : PTXReg<"f37">;
+def F38 : PTXReg<"f38">;
+def F39 : PTXReg<"f39">;
+def F40 : PTXReg<"f40">;
+def F41 : PTXReg<"f41">;
+def F42 : PTXReg<"f42">;
+def F43 : PTXReg<"f43">;
+def F44 : PTXReg<"f44">;
+def F45 : PTXReg<"f45">;
+def F46 : PTXReg<"f46">;
+def F47 : PTXReg<"f47">;
+def F48 : PTXReg<"f48">;
+def F49 : PTXReg<"f49">;
+def F50 : PTXReg<"f50">;
+def F51 : PTXReg<"f51">;
+def F52 : PTXReg<"f52">;
+def F53 : PTXReg<"f53">;
+def F54 : PTXReg<"f54">;
+def F55 : PTXReg<"f55">;
+def F56 : PTXReg<"f56">;
+def F57 : PTXReg<"f57">;
+def F58 : PTXReg<"f58">;
+def F59 : PTXReg<"f59">;
+def F60 : PTXReg<"f60">;
+def F61 : PTXReg<"f61">;
+def F62 : PTXReg<"f62">;
+def F63 : PTXReg<"f63">;
+
+
+///===- 64-bit Floating-Point Registers -----------------------------------===//
+
+def FD0  : PTXReg<"fd0">;
+def FD1  : PTXReg<"fd1">;
+def FD2  : PTXReg<"fd2">;
+def FD3  : PTXReg<"fd3">;
+def FD4  : PTXReg<"fd4">;
+def FD5  : PTXReg<"fd5">;
+def FD6  : PTXReg<"fd6">;
+def FD7  : PTXReg<"fd7">;
+def FD8  : PTXReg<"fd8">;
+def FD9  : PTXReg<"fd9">;
+def FD10 : PTXReg<"fd10">;
+def FD11 : PTXReg<"fd11">;
+def FD12 : PTXReg<"fd12">;
+def FD13 : PTXReg<"fd13">;
+def FD14 : PTXReg<"fd14">;
+def FD15 : PTXReg<"fd15">;
+def FD16 : PTXReg<"fd16">;
+def FD17 : PTXReg<"fd17">;
+def FD18 : PTXReg<"fd18">;
+def FD19 : PTXReg<"fd19">;
+def FD20 : PTXReg<"fd20">;
+def FD21 : PTXReg<"fd21">;
+def FD22 : PTXReg<"fd22">;
+def FD23 : PTXReg<"fd23">;
+def FD24 : PTXReg<"fd24">;
+def FD25 : PTXReg<"fd25">;
+def FD26 : PTXReg<"fd26">;
+def FD27 : PTXReg<"fd27">;
+def FD28 : PTXReg<"fd28">;
+def FD29 : PTXReg<"fd29">;
+def FD30 : PTXReg<"fd30">;
+def FD31 : PTXReg<"fd31">;
+def FD32 : PTXReg<"fd32">;
+def FD33 : PTXReg<"fd33">;
+def FD34 : PTXReg<"fd34">;
+def FD35 : PTXReg<"fd35">;
+def FD36 : PTXReg<"fd36">;
+def FD37 : PTXReg<"fd37">;
+def FD38 : PTXReg<"fd38">;
+def FD39 : PTXReg<"fd39">;
+def FD40 : PTXReg<"fd40">;
+def FD41 : PTXReg<"fd41">;
+def FD42 : PTXReg<"fd42">;
+def FD43 : PTXReg<"fd43">;
+def FD44 : PTXReg<"fd44">;
+def FD45 : PTXReg<"fd45">;
+def FD46 : PTXReg<"f4d6">;
+def FD47 : PTXReg<"fd47">;
+def FD48 : PTXReg<"fd48">;
+def FD49 : PTXReg<"fd49">;
+def FD50 : PTXReg<"fd50">;
+def FD51 : PTXReg<"fd51">;
+def FD52 : PTXReg<"fd52">;
+def FD53 : PTXReg<"fd53">;
+def FD54 : PTXReg<"fd54">;
+def FD55 : PTXReg<"fd55">;
+def FD56 : PTXReg<"fd56">;
+def FD57 : PTXReg<"fd57">;
+def FD58 : PTXReg<"fd58">;
+def FD59 : PTXReg<"fd59">;
+def FD60 : PTXReg<"fd60">;
+def FD61 : PTXReg<"fd61">;
+def FD62 : PTXReg<"fd62">;
+def FD63 : PTXReg<"fd63">;
+
 
 //===----------------------------------------------------------------------===//
 //  Register classes
@@ -93,10 +434,58 @@ def Preds : RegisterClass<"PTX", [i1], 8,
                           [P0, P1, P2, P3, P4, P5, P6, P7,
                            P8, P9, P10, P11, P12, P13, P14, P15,
                            P16, P17, P18, P19, P20, P21, P22, P23,
-                           P24, P25, P26, P27, P28, P29, P30, P31]>;
+                           P24, P25, P26, P27, P28, P29, P30, P31,
+                           P32, P33, P34, P35, P36, P37, P38, P39,
+                           P40, P41, P42, P43, P44, P45, P46, P47,
+                           P48, P49, P50, P51, P52, P53, P54, P55,
+                           P56, P57, P58, P59, P60, P61, P62, P63]>;
 
-def RRegs32 : RegisterClass<"PTX", [i32], 32,
+def RRegu16 : RegisterClass<"PTX", [i16], 16,
+                            [RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7,
+                             RH8, RH9, RH10, RH11, RH12, RH13, RH14, RH15,
+                             RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23,
+                             RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31,
+                             RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39,
+                             RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47,
+                             RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55,
+                             RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63]>;
+
+def RRegu32 : RegisterClass<"PTX", [i32], 32,
                             [R0, R1, R2, R3, R4, R5, R6, R7,
                              R8, R9, R10, R11, R12, R13, R14, R15,
                              R16, R17, R18, R19, R20, R21, R22, R23,
-                             R24, R25, R26, R27, R28, R29, R30, R31]>;
+                             R24, R25, R26, R27, R28, R29, R30, R31,
+                             R32, R33, R34, R35, R36, R37, R38, R39,
+                             R40, R41, R42, R43, R44, R45, R46, R47,
+                             R48, R49, R50, R51, R52, R53, R54, R55,
+                             R56, R57, R58, R59, R60, R61, R62, R63]>;
+
+def RRegu64 : RegisterClass<"PTX", [i64], 64,
+                            [RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7,
+                             RD8, RD9, RD10, RD11, RD12, RD13, RD14, RD15,
+                             RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23,
+                             RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31,
+                             RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39,
+                             RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47,
+                             RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55,
+                             RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63]>;
+
+def RRegf32 : RegisterClass<"PTX", [f32], 32,
+                            [F0, F1, F2, F3, F4, F5, F6, F7,
+                             F8, F9, F10, F11, F12, F13, F14, F15,
+                             F16, F17, F18, F19, F20, F21, F22, F23,
+                             F24, F25, F26, F27, F28, F29, F30, F31,
+                             F32, F33, F34, F35, F36, F37, F38, F39,
+                             F40, F41, F42, F43, F44, F45, F46, F47,
+                             F48, F49, F50, F51, F52, F53, F54, F55,
+                             F56, F57, F58, F59, F60, F61, F62, F63]>;
+
+def RRegf64 : RegisterClass<"PTX", [f64], 64,
+                            [FD0, FD1, FD2, FD3, FD4, FD5, FD6, FD7,
+                             FD8, FD9, FD10, FD11, FD12, FD13, FD14, FD15,
+                             FD16, FD17, FD18, FD19, FD20, FD21, FD22, FD23,
+                             FD24, FD25, FD26, FD27, FD28, FD29, FD30, FD31,
+                             FD32, FD33, FD34, FD35, FD36, FD37, FD38, FD39,
+                             FD40, FD41, FD42, FD43, FD44, FD45, FD46, FD47,
+                             FD48, FD49, FD50, FD51, FD52, FD53, FD54, FD55,
+                             FD56, FD57, FD58, FD59, FD60, FD61, FD62, FD63]>;
diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp
index 00e2c88..a224f2b 100644
--- a/lib/Target/PTX/PTXSubtarget.cpp
+++ b/lib/Target/PTX/PTXSubtarget.cpp
@@ -12,12 +12,36 @@
 //===----------------------------------------------------------------------===//
 
 #include "PTXSubtarget.h"
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
-PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS) {
-  std::string TARGET = "sm_20";
-  // TODO: call ParseSubtargetFeatures(FS, TARGET);
+PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS,
+                           bool is64Bit)
+  : PTXShaderModel(PTX_SM_1_0),
+    PTXVersion(PTX_VERSION_2_0),
+    SupportsDouble(false),
+    Is64Bit(is64Bit) {
+  std::string TARGET = "generic";
+  ParseSubtargetFeatures(FS, TARGET);
+}
+
+std::string PTXSubtarget::getTargetString() const {
+  switch(PTXShaderModel) {
+    default: llvm_unreachable("Unknown shader model");
+    case PTX_SM_1_0: return "sm_10";
+    case PTX_SM_1_3: return "sm_13";
+    case PTX_SM_2_0: return "sm_20";
+  }
+}
+
+std::string PTXSubtarget::getPTXVersionString() const {
+  switch(PTXVersion) {
+    default: llvm_unreachable("Unknown PTX version");
+    case PTX_VERSION_2_0: return "2.0";
+    case PTX_VERSION_2_1: return "2.1";
+    case PTX_VERSION_2_2: return "2.2";
+  }
 }
 
 #include "PTXGenSubtarget.inc"
diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h
index 7fd85f8..47d9842 100644
--- a/lib/Target/PTX/PTXSubtarget.h
+++ b/lib/Target/PTX/PTXSubtarget.h
@@ -19,10 +19,57 @@
 namespace llvm {
   class PTXSubtarget : public TargetSubtarget {
     private:
-      bool is_sm20;
+
+      /**
+       * Enumeration of Shader Models supported by the back-end.
+       */
+      enum PTXShaderModelEnum {
+        PTX_SM_1_0, /*< Shader Model 1.0 */
+        PTX_SM_1_3, /*< Shader Model 1.3 */
+        PTX_SM_2_0  /*< Shader Model 2.0 */
+      };
+
+      /**
+       * Enumeration of PTX versions supported by the back-end.
+       *
+       * Currently, PTX 2.0 is the minimum supported version.
+       */
+      enum PTXVersionEnum {
+        PTX_VERSION_2_0,  /*< PTX Version 2.0 */
+        PTX_VERSION_2_1,  /*< PTX Version 2.1 */
+        PTX_VERSION_2_2   /*< PTX Version 2.2 */
+      };
+
+      /// Shader Model supported on the target GPU.
+      PTXShaderModelEnum PTXShaderModel;
+
+      /// PTX Language Version.
+      PTXVersionEnum PTXVersion;
+
+      // The native .f64 type is supported on the hardware.
+      bool SupportsDouble;
+
+      // Use .u64 instead of .u32 for addresses.
+      bool Is64Bit;
 
     public:
-      PTXSubtarget(const std::string &TT, const std::string &FS);
+      PTXSubtarget(const std::string &TT, const std::string &FS, bool is64Bit);
+
+      std::string getTargetString() const;
+
+      std::string getPTXVersionString() const;
+
+      bool supportsDouble() const { return SupportsDouble; }
+
+      bool is64Bit() const { return Is64Bit; }
+
+      bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; }
+
+      bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; }
+
+      bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
+
+      bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; }
 
       std::string ParseSubtargetFeatures(const std::string &FS,
                                          const std::string &CPU);
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
index b263813..1b737c9 100644
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -16,12 +16,14 @@
 #include "PTXTargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
 namespace llvm {
   MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
                                    bool isVerboseAsm, bool useLoc,
+                                   bool useCFI,
                                    MCInstPrinter *InstPrint,
                                    MCCodeEmitter *CE,
                                    TargetAsmBackend *TAB,
@@ -29,21 +31,47 @@ namespace llvm {
 }
 
 extern "C" void LLVMInitializePTXTarget() {
-  RegisterTargetMachine<PTXTargetMachine> X(ThePTXTarget);
-  RegisterAsmInfo<PTXMCAsmInfo> Y(ThePTXTarget);
-  TargetRegistry::RegisterAsmStreamer(ThePTXTarget, createPTXAsmStreamer);
+
+  RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target);
+  RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target);
+
+  RegisterAsmInfo<PTXMCAsmInfo> Z(ThePTX32Target);
+  RegisterAsmInfo<PTXMCAsmInfo> W(ThePTX64Target);
+
+  TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer);
+  TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer);
+}
+
+namespace {
+  const char* DataLayout32 =
+    "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
+  const char* DataLayout64 =
+    "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
 }
 
 // DataLayout and FrameLowering are filled with dummy data
 PTXTargetMachine::PTXTargetMachine(const Target &T,
                                    const std::string &TT,
-                                   const std::string &FS)
+                                   const std::string &FS,
+                                   bool is64Bit)
   : LLVMTargetMachine(T, TT),
-    DataLayout("e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"),
+    DataLayout(is64Bit ? DataLayout64 : DataLayout32),
+    Subtarget(TT, FS, is64Bit),
     FrameLowering(Subtarget),
     InstrInfo(*this),
-    TLInfo(*this),
-    Subtarget(TT, FS) {
+    TLInfo(*this) {
+}
+
+PTX32TargetMachine::PTX32TargetMachine(const Target &T,
+                                       const std::string& TT,
+                                       const std::string& FS)
+  : PTXTargetMachine(T, TT, FS, false) {
+}
+
+PTX64TargetMachine::PTX64TargetMachine(const Target &T,
+                                       const std::string& TT,
+                                       const std::string& FS)
+  : PTXTargetMachine(T, TT, FS, true) {
 }
 
 bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h
index 728e36f..149be8e 100644
--- a/lib/Target/PTX/PTXTargetMachine.h
+++ b/lib/Target/PTX/PTXTargetMachine.h
@@ -25,15 +25,15 @@
 namespace llvm {
 class PTXTargetMachine : public LLVMTargetMachine {
   private:
-    const TargetData DataLayout;
-    PTXFrameLowering FrameLowering;
-    PTXInstrInfo InstrInfo;
+    const TargetData  DataLayout;
+    PTXSubtarget      Subtarget; // has to be initialized before FrameLowering
+    PTXFrameLowering  FrameLowering;
+    PTXInstrInfo      InstrInfo;
     PTXTargetLowering TLInfo;
-    PTXSubtarget Subtarget;
 
   public:
     PTXTargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS);
+                     const std::string &FS, bool is64Bit);
 
     virtual const TargetData *getTargetData() const { return &DataLayout; }
 
@@ -55,6 +55,22 @@ class PTXTargetMachine : public LLVMTargetMachine {
     virtual bool addPostRegAlloc(PassManagerBase &PM,
                                  CodeGenOpt::Level OptLevel);
 }; // class PTXTargetMachine
+
+
+class PTX32TargetMachine : public PTXTargetMachine {
+public:
+
+  PTX32TargetMachine(const Target &T, const std::string &TT,
+                     const std::string& FS);
+}; // class PTX32TargetMachine
+
+class PTX64TargetMachine : public PTXTargetMachine {
+public:
+
+  PTX64TargetMachine(const Target &T, const std::string &TT,
+                     const std::string& FS);
+}; // class PTX32TargetMachine
+
 } // namespace llvm
 
 #endif // PTX_TARGET_MACHINE_H
diff --git a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
index a577d77..9df6c75 100644
--- a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
+++ b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
@@ -13,9 +13,13 @@
 
 using namespace llvm;
 
-Target llvm::ThePTXTarget;
+Target llvm::ThePTX32Target;
+Target llvm::ThePTX64Target;
 
 extern "C" void LLVMInitializePTXTargetInfo() {
   // see llvm/ADT/Triple.h
-  RegisterTarget<Triple::ptx> X(ThePTXTarget, "ptx", "PTX");
+  RegisterTarget<Triple::ptx32> X32(ThePTX32Target, "ptx32",
+                                    "PTX (32-bit) [Experimental]");
+  RegisterTarget<Triple::ptx64> X64(ThePTX64Target, "ptx64",
+                                    "PTX (64-bit) [Experimental]");
 }
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index ebc10da..9cf9db9 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -17,13 +17,16 @@
 #include "llvm/MC/MCInstPrinter.h"
 
 namespace llvm {
-  class MCOperand;
+
+class MCOperand;
+class TargetMachine;
 
 class PPCInstPrinter : public MCInstPrinter {
   // 0 -> AIX, 1 -> Darwin.
   unsigned SyntaxVariant;
 public:
-  PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant)
+  PPCInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI,
+                 unsigned syntaxVariant)
     : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {}
   
   bool isDarwinSyntax() const {
diff --git a/lib/Target/PowerPC/PPCAsmBackend.cpp b/lib/Target/PowerPC/PPCAsmBackend.cpp
index c4d4ac9..f562a3f 100644
--- a/lib/Target/PowerPC/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/PPCAsmBackend.cpp
@@ -110,10 +110,8 @@ namespace {
 
 TargetAsmBackend *llvm::createPPCAsmBackend(const Target &T,
                                             const std::string &TT) {
-  switch (Triple(TT).getOS()) {
-  case Triple::Darwin:
+  if (Triple(TT).isOSDarwin())
     return new DarwinPPCAsmBackend(T);
-  default:
-    return 0;
-  }
+
+  return 0;
 }
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 8ed5d7f..09a9be9 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -680,9 +680,10 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
 }
 
 static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+                                             TargetMachine &TM,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI) {
-  return new PPCInstPrinter(MAI, SyntaxVariant);
+  return new PPCInstPrinter(TM, MAI, SyntaxVariant);
 }
 
 
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 70d00e4..128522c 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -899,7 +899,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
     short Imm;
     if (isIntS16Immediate(CN, Imm)) {
       Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
-      Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
+      Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+                             CN->getValueType(0));
       return true;
     }
 
@@ -947,7 +948,8 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
   }
 
   // Otherwise, do it the hard way, using R0 as the base register.
-  Base = DAG.getRegister(PPC::R0, N.getValueType());
+  Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+                         N.getValueType());
   Index = N;
   return true;
 }
@@ -2153,7 +2155,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
 }
 
 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
-/// adjusted to accomodate the arguments for the tailcall.
+/// adjusted to accommodate the arguments for the tailcall.
 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
                                    unsigned ParamSize) {
 
@@ -2394,7 +2396,7 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
   // might overwrite each other in case of tail call optimization.
   SmallVector<SDValue, 8> MemOpChains2;
-  // Do not flag preceeding copytoreg stuff together with the following stuff.
+  // Do not flag preceding copytoreg stuff together with the following stuff.
   InFlag = SDValue();
   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
                                     MemOpChains2, dl);
@@ -2442,7 +2444,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
     if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
       unsigned OpFlags = 0;
       if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
-          PPCSubTarget.getDarwinVers() < 9 &&
+          (!PPCSubTarget.getTargetTriple().isMacOSX() ||
+           PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
           (G->getGlobal()->isDeclaration() ||
            G->getGlobal()->isWeakForLinker())) {
         // PC-relative references to external symbols should go through $stub,
@@ -2465,7 +2468,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
     unsigned char OpFlags = 0;
 
     if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
-        PPCSubTarget.getDarwinVers() < 9) {
+        (!PPCSubTarget.getTargetTriple().isMacOSX() ||
+         PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
       // PC-relative references to external symbols should go through $stub,
       // unless we're building with the leopard linker or later, which
       // automatically synthesizes these stubs.
@@ -4571,6 +4575,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
   // registers without caring whether they're 32 or 64, but here we're
   // doing actual arithmetic on the addresses.
   bool is64bit = PPCSubTarget.isPPC64();
+  unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   MachineFunction *F = BB->getParent();
@@ -4634,8 +4639,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
   //   bne- loopMBB
   //   fallthrough --> exitMBB
   //   srw dest, tmpDest, shift
-
-  if (ptrA!=PPC::R0) {
+  if (ptrA != ZeroReg) {
     Ptr1Reg = RegInfo.createVirtualRegister(RC);
     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
       .addReg(ptrA).addReg(ptrB);
@@ -4665,7 +4669,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
 
   BB = loopMBB;
   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
-    .addReg(PPC::R0).addReg(PtrReg);
+    .addReg(ZeroReg).addReg(PtrReg);
   if (BinOpcode)
     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
       .addReg(Incr2Reg).addReg(TmpDestReg);
@@ -4676,7 +4680,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
     .addReg(Tmp3Reg).addReg(Tmp2Reg);
   BuildMI(BB, dl, TII->get(PPC::STWCX))
-    .addReg(Tmp4Reg).addReg(PPC::R0).addReg(PtrReg);
+    .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
   BuildMI(BB, dl, TII->get(PPC::BCC))
     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
   BB->addSuccessor(loopMBB);
@@ -4685,7 +4689,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
   //  exitMBB:
   //   ...
   BB = exitMBB;
-  BuildMI(BB, dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg).addReg(ShiftReg);
+  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
+    .addReg(ShiftReg);
   return BB;
 }
 
@@ -4933,6 +4938,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
     unsigned Ptr1Reg;
     unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+    unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
     //  thisMBB:
     //   ...
     //   fallthrough --> loopMBB
@@ -4965,7 +4971,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     //   stwcx. tmpDest, ptr
     // exitBB:
     //   srw dest, tmpDest, shift
-    if (ptrA!=PPC::R0) {
+    if (ptrA != ZeroReg) {
       Ptr1Reg = RegInfo.createVirtualRegister(RC);
       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
         .addReg(ptrA).addReg(ptrB);
@@ -5002,7 +5008,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
     BB = loop1MBB;
     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
-        .addReg(PPC::R0).addReg(PtrReg);
+        .addReg(ZeroReg).addReg(PtrReg);
     BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
         .addReg(TmpDestReg).addReg(MaskReg);
     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
@@ -5018,7 +5024,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
         .addReg(Tmp2Reg).addReg(NewVal3Reg);
     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
-        .addReg(PPC::R0).addReg(PtrReg);
+        .addReg(ZeroReg).addReg(PtrReg);
     BuildMI(BB, dl, TII->get(PPC::BCC))
       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
@@ -5027,13 +5033,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
     BB = midMBB;
     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
-      .addReg(PPC::R0).addReg(PtrReg);
+      .addReg(ZeroReg).addReg(PtrReg);
     BB->addSuccessor(exitMBB);
 
     //  exitMBB:
     //   ...
     BB = exitMBB;
-    BuildMI(BB, dl, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg);
+    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
+      .addReg(ShiftReg);
   } else {
     llvm_unreachable("Unexpected instr type to insert");
   }
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 6636b69..9f0fae5 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -130,7 +130,7 @@ def : Pat<(PPCnop),
 
 // Atomic operations
 let usesCustomInserter = 1 in {
-  let Uses = [CR0] in {
+  let Defs = [CR0] in {
     def ATOMIC_LOAD_ADD_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
       [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 82aadeb..24071b7 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -550,7 +550,7 @@ def DCBZL  : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
 
 // Atomic operations
 let usesCustomInserter = 1 in {
-  let Uses = [CR0] in {
+  let Defs = [CR0] in {
     def ATOMIC_LOAD_ADD_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/PPCMCAsmInfo.cpp
index d1178dd..9e508cc 100644
--- a/lib/Target/PowerPC/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/PPCMCAsmInfo.cpp
@@ -17,7 +17,7 @@ using namespace llvm;
 PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
   PCSymbol = ".";
   CommentString = ";";
-  ExceptionsType = ExceptionHandling::DwarfTable;
+  ExceptionsType = ExceptionHandling::DwarfCFI;
 
   if (!is64Bit)
     Data64bitsDirective = 0;      // We can't emit a 64-bit unit in PPC32 mode.
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 72a1dee..5f3aa23 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -70,7 +70,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS,
   , HasSTFIWX(false)
   , HasLazyResolverStubs(false)
   , IsJITCodeModel(false)
-  , DarwinVers(0) {
+  , TargetTriple(TT) {
 
   // Determine default and user specified characteristics
   std::string CPU = "generic";
@@ -92,19 +92,6 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS,
   // support it, ignore.
   if (use64BitRegs() && !has64BitSupport())
     Use64BitRegs = false;
-  
-  // Set the boolean corresponding to the current target triple, or the default
-  // if one cannot be determined, to true.
-  if (TT.length() > 7) {
-    // Determine which version of darwin this is.
-    size_t DarwinPos = TT.find("-darwin");
-    if (DarwinPos != std::string::npos) {
-      if (isdigit(TT[DarwinPos+7]))
-        DarwinVers = atoi(&TT[DarwinPos+7]);
-      else
-        DarwinVers = 8;  // Minimum supported darwin is Tiger.
-    }
-  }
 
   // Set up darwin-specific properties.
   if (isDarwin())
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 00ec747..8fd1a44 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -14,6 +14,7 @@
 #ifndef POWERPCSUBTARGET_H
 #define POWERPCSUBTARGET_H
 
+#include "llvm/ADT/Triple.h"
 #include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetSubtarget.h"
 
@@ -65,9 +66,9 @@ protected:
   bool HasLazyResolverStubs;
   bool IsJITCodeModel;
   
-  /// DarwinVers - Nonzero if this is a darwin platform.  Otherwise, the numeric
-  /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
-  unsigned char DarwinVers; // Is any darwin-ppc platform.
+  /// TargetTriple - What processor and OS we're targeting.
+  Triple TargetTriple;
+
 public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
@@ -134,13 +135,10 @@ public:
   bool hasAltivec() const { return HasAltivec; }
   bool isGigaProcessor() const { return IsGigaProcessor; }
 
-  /// isDarwin - True if this is any darwin platform.
-  bool isDarwin() const { return DarwinVers != 0; }
-  /// isDarwin - True if this is darwin9 (leopard, 10.5) or above.
-  bool isDarwin9() const { return DarwinVers >= 9; }
+  const Triple &getTargetTriple() const { return TargetTriple; }
 
-  /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
-  unsigned getDarwinVers() const { return DarwinVers; }
+  /// isDarwin - True if this is any darwin platform.
+  bool isDarwin() const { return TargetTriple.isMacOSX(); }
 
   bool isDarwinABI() const { return isDarwin(); }
   bool isSVR4ABI() const { return !isDarwin(); }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 212b450..d27e54e 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
 static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
-  if (TheTriple.getOS() == Triple::Darwin)
+  if (TheTriple.isOSDarwin())
     return new PPCMCAsmInfoDarwin(isPPC64);
   return new PPCLinuxMCAsmInfo(isPPC64);
   
@@ -37,12 +37,10 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
                                     MCCodeEmitter *Emitter,
                                     bool RelaxAll,
                                     bool NoExecStack) {
-  switch (Triple(TT).getOS()) {
-  case Triple::Darwin:
+  if (Triple(TT).isOSDarwin())
     return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
-  default:
-    return NULL;
-  }
+
+  return NULL;
 }
 
 extern "C" void LLVMInitializePowerPCTarget() {
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index f85914b..ffe3fa4 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -392,34 +392,6 @@ PHI Slicing could be extended to do this.
 
 //===---------------------------------------------------------------------===//
 
-LSR should know what GPR types a target has from TargetData.  This code:
-
-volatile short X, Y; // globals
-
-void foo(int N) {
-  int i;
-  for (i = 0; i < N; i++) { X = i; Y = i*4; }
-}
-
-produces two near identical IV's (after promotion) on PPC/ARM:
-
-LBB1_2:
-	ldr r3, LCPI1_0
-	ldr r3, [r3]
-	strh r2, [r3]
-	ldr r3, LCPI1_1
-	ldr r3, [r3]
-	strh r1, [r3]
-	add r1, r1, #4
-	add r2, r2, #1   <- [0,+,1]
-	sub r0, r0, #1   <- [0,-,1]
-	cmp r0, #0
-	bne LBB1_2
-
-LSR should reuse the "+" IV for the exit test.
-
-//===---------------------------------------------------------------------===//
-
 Tail call elim should be more aggressive, checking to see if the call is
 followed by an uncond branch to an exit block.
 
@@ -1325,6 +1297,21 @@ codegen.
 
 //===---------------------------------------------------------------------===//
 
+simplifylibcalls should turn these snprintf idioms into memcpy (GCC PR47917)
+
+char buf1[6], buf2[6], buf3[4], buf4[4];
+int i;
+
+int foo (void) {
+  int ret = snprintf (buf1, sizeof buf1, "abcde");
+  ret += snprintf (buf2, sizeof buf2, "abcdef") * 16;
+  ret += snprintf (buf3, sizeof buf3, "%s", i++ < 6 ? "abc" : "def") * 256;
+  ret += snprintf (buf4, sizeof buf4, "%s", i++ > 10 ? "abcde" : "defgh")*4096;
+  return ret;
+}
+
+//===---------------------------------------------------------------------===//
+
 "gas" uses this idiom:
   else if (strchr ("+-/*%|&^:[]()~", *intel_parser.op_string))
 ..
@@ -1780,43 +1767,6 @@ case it choses instead to keep the max operation obvious.
 
 //===---------------------------------------------------------------------===//
 
-Take the following testcase on x86-64 (similar testcases exist for all targets
-with addc/adde):
-
-define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b,
-i64 %c) nounwind {
-entry:
- %0 = zext i64 %a to i128                        ; <i128> [#uses=1]
- %1 = zext i64 %b to i128                        ; <i128> [#uses=1]
- %2 = add i128 %1, %0                            ; <i128> [#uses=2]
- %3 = zext i64 %c to i128                        ; <i128> [#uses=1]
- %4 = shl i128 %3, 64                            ; <i128> [#uses=1]
- %5 = add i128 %4, %2                            ; <i128> [#uses=1]
- %6 = lshr i128 %5, 64                           ; <i128> [#uses=1]
- %7 = trunc i128 %6 to i64                       ; <i64> [#uses=1]
- store i64 %7, i64* %s, align 8
- %8 = trunc i128 %2 to i64                       ; <i64> [#uses=1]
- store i64 %8, i64* %t, align 8
- ret void
-}
-
-Generated code:
-        addq	%rcx, %rdx
-        sbbq	%rax, %rax
-        subq	%rax, %r8
-        movq	%r8, (%rdi)
-        movq	%rdx, (%rsi)
-        ret
-
-Expected code:
-       addq    %rcx, %rdx
-       adcq    $0, %r8
-       movq    %r8, (%rdi)
-       movq    %rdx, (%rsi)
-       ret
-
-//===---------------------------------------------------------------------===//
-
 Switch lowering generates less than ideal code for the following switch:
 define void @a(i32 %x) nounwind {
 entry:
@@ -2124,11 +2074,12 @@ for.end:                                          ; preds = %entry
 }
 
 This shouldn't need the ((zext (%n - 1)) + 1) game, and it should ideally fold
-the two memset's together. The issue with %n seems to stem from poor handling
-of the original loop.
+the two memset's together.
 
-To simplify this, we need SCEV to know that "n != 0" because of the dominating
-conditional.  That would turn the second memset into a simple memset of 'n'.
+The issue with the addition only occurs in 64-bit mode, and appears to be at
+least partially caused by Scalar Evolution not keeping its cache updated: it
+returns the "wrong" result immediately after indvars runs, but figures out the
+expected result if it is run from scratch on IR resulting from running indvars.
 
 //===---------------------------------------------------------------------===//
 
@@ -2287,4 +2238,71 @@ missed cases:
 
 //===---------------------------------------------------------------------===//
 
+define i1 @test1(i32 %x) nounwind {
+  %and = and i32 %x, 3
+  %cmp = icmp ult i32 %and, 2
+  ret i1 %cmp
+}
+
+Can be folded to (x & 2) == 0.
+
+define i1 @test2(i32 %x) nounwind {
+  %and = and i32 %x, 3
+  %cmp = icmp ugt i32 %and, 1
+  ret i1 %cmp
+}
+
+Can be folded to (x & 2) != 0.
+
+SimplifyDemandedBits shrinks the "and" constant to 2 but instcombine misses the
+icmp transform.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+typedef struct {
+int f1:1;
+int f2:1;
+int f3:1;
+int f4:29;
+} t1;
+
+typedef struct {
+int f1:1;
+int f2:1;
+int f3:30;
+} t2;
+
+t1 s1;
+t2 s2;
+
+void func1(void)
+{
+s1.f1 = s2.f1;
+s1.f2 = s2.f2;
+}
+
+Compiles into this IR (on x86-64 at least):
+
+%struct.t1 = type { i8, [3 x i8] }
+@s2 = global %struct.t1 zeroinitializer, align 4
+@s1 = global %struct.t1 zeroinitializer, align 4
+define void @func1() nounwind ssp noredzone {
+entry:
+  %0 = load i32* bitcast (%struct.t1* @s2 to i32*), align 4
+  %bf.val.sext5 = and i32 %0, 1
+  %1 = load i32* bitcast (%struct.t1* @s1 to i32*), align 4
+  %2 = and i32 %1, -4
+  %3 = or i32 %2, %bf.val.sext5
+  %bf.val.sext26 = and i32 %0, 2
+  %4 = or i32 %3, %bf.val.sext26
+  store i32 %4, i32* bitcast (%struct.t1* @s1 to i32*), align 4
+  ret void
+}
+
+The two or/and's should be merged into one each.
+
+//===---------------------------------------------------------------------===//
+
 
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 70574c3..edb62fa 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -544,7 +544,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
   // Build a sequence of copy-to-reg nodes chained together with token
   // chain and flag operands which copy the outgoing args into registers.
-  // The InFlag in necessary since all emited instructions must be
+  // The InFlag in necessary since all emitted instructions must be
   // stuck together.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp
index 3cf95b5..e0a9de8 100644
--- a/lib/Target/SubtargetFeature.cpp
+++ b/lib/Target/SubtargetFeature.cpp
@@ -211,7 +211,7 @@ const std::string & SubtargetFeatures::getCPU() const {
 /// feature, set it.
 ///
 static
-void SetImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+void SetImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
                     const SubtargetFeatureKV *FeatureTable,
                     size_t FeatureTableSize) {
   for (size_t i = 0; i < FeatureTableSize; ++i) {
@@ -230,7 +230,7 @@ void SetImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
 /// feature, clear it.
 /// 
 static
-void ClearImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
                       const SubtargetFeatureKV *FeatureTable,
                       size_t FeatureTableSize) {
   for (size_t i = 0; i < FeatureTableSize; ++i) {
@@ -247,7 +247,7 @@ void ClearImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
 
 /// getBits - Get feature bits.
 ///
-uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
+uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
                                           size_t CPUTableSize,
                                     const SubtargetFeatureKV *FeatureTable,
                                           size_t FeatureTableSize) {
@@ -263,7 +263,7 @@ uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
           "CPU features table is not sorted");
   }
 #endif
-  uint32_t Bits = 0;                    // Resulting bits
+  uint64_t Bits = 0;                    // Resulting bits
 
   // Check if help is needed
   if (Features[0] == "help")
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 90939c3..d331614 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -451,7 +451,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
 
   // Build a sequence of copy-to-reg nodes chained together with token chain and
   // flag operands which copy the outgoing args into registers.  The InFlag in
-  // necessary since all emited instructions must be stuck together.
+  // necessary since all emitted instructions must be stuck together.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index c628df0..1990bc7 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -617,10 +617,14 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
 unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const {
   const Type *ElemType = GV->getType()->getElementType();
   unsigned Alignment = getPrefTypeAlignment(ElemType);
-  if (GV->getAlignment() > Alignment)
-    Alignment = GV->getAlignment();
+  unsigned GVAlignment = GV->getAlignment();
+  if (GVAlignment >= Alignment) {
+    Alignment = GVAlignment;
+  } else if (GVAlignment != 0) {
+    Alignment = std::max(GVAlignment, getABITypeAlignment(ElemType));
+  }
 
-  if (GV->hasInitializer()) {
+  if (GV->hasInitializer() && GVAlignment == 0) {
     if (Alignment < 16) {
       // If the global is not external, see if it is large.  If so, give it a
       // larger alignment.
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index 97f3bf6..d4b7697 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -149,10 +149,10 @@ bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
 
 /// Measure the specified inline asm to determine an approximation of its
 /// length.
-/// Comments (which run till the next SeparatorChar or newline) do not
+/// Comments (which run till the next SeparatorString or newline) do not
 /// count as an instruction.
 /// Any other non-whitespace text is considered an instruction, with
-/// multiple instructions separated by SeparatorChar or newlines.
+/// multiple instructions separated by SeparatorString or newlines.
 /// Variable-length instructions are not handled here; this function
 /// may be overloaded in the target code to do that.
 unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
@@ -163,7 +163,8 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
   bool atInsnStart = true;
   unsigned Length = 0;
   for (; *Str; ++Str) {
-    if (*Str == '\n' || *Str == MAI.getSeparatorChar())
+    if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+                                strlen(MAI.getSeparatorString())) == 0)
       atInsnStart = true;
     if (atInsnStart && !std::isspace(*Str)) {
       Length += MAI.getMaxInstLength();
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index c8bed18..e336b09 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -28,9 +28,22 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T) {
 
   
   // memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later.
-  if (T.getOS() != Triple::Darwin || T.getDarwinMajorNumber() < 9)
+  if (T.isMacOSX()) {
+    if (T.isMacOSXVersionLT(10, 5))
+      TLI.setUnavailable(LibFunc::memset_pattern16);
+  } else if (T.getOS() == Triple::IOS) {
+    if (T.isOSVersionLT(3, 0))
+      TLI.setUnavailable(LibFunc::memset_pattern16);
+  } else {
     TLI.setUnavailable(LibFunc::memset_pattern16);
-  
+  }
+
+  // iprintf and friends are only available on XCore.
+  if (T.getArch() != Triple::xcore) {
+    TLI.setUnavailable(LibFunc::iprintf);
+    TLI.setUnavailable(LibFunc::siprintf);
+    TLI.setUnavailable(LibFunc::fiprintf);
+  }
 }
 
 
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 5d34c7d..717ad41 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -120,6 +120,18 @@ static bool IsNullTerminatedString(const Constant *C) {
   return false;
 }
 
+MCSymbol *TargetLoweringObjectFile::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+                        MachineModuleInfo *MMI) const {
+  return Mang->getSymbol(GV);
+}
+
+void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer,
+                                                    const TargetMachine &TM,
+                                                    const MCSymbol *Sym) const {
+}
+
+
 /// getKindForGlobal - This is a top-level target-independent classifier for
 /// a global variable.  Given an global variable and information from TM, it
 /// classifies the global in a variety of ways that make various target
@@ -305,16 +317,15 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
                                MachineModuleInfo *MMI, unsigned Encoding,
                                MCStreamer &Streamer) const {
   const MCSymbol *Sym = Mang->getSymbol(GV);
-  return getExprForDwarfReference(Sym, Mang, MMI, Encoding, Streamer);
+  return getExprForDwarfReference(Sym, Encoding, Streamer);
 }
 
 const MCExpr *TargetLoweringObjectFile::
-getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang,
-                         MachineModuleInfo *MMI, unsigned Encoding,
+getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding,
                          MCStreamer &Streamer) const {
   const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext());
 
-  switch (Encoding & 0xF0) {
+  switch (Encoding & 0x70) {
   default:
     report_fatal_error("We do not support this DWARF encoding yet!");
   case dwarf::DW_EH_PE_absptr:
@@ -339,7 +350,7 @@ unsigned TargetLoweringObjectFile::getLSDAEncoding() const {
   return dwarf::DW_EH_PE_absptr;
 }
 
-unsigned TargetLoweringObjectFile::getFDEEncoding() const {
+unsigned TargetLoweringObjectFile::getFDEEncoding(bool CFI) const {
   return dwarf::DW_EH_PE_absptr;
 }
 
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index d579d95..76ccc09 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -48,6 +48,7 @@ namespace llvm {
   bool RealignStack;
   bool DisableJumpTables;
   bool StrongPHIElim;
+  bool HasDivModLibcall;
   bool AsmVerbosityDefault(false);
 }
 
@@ -205,6 +206,10 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
   cl::desc("Use strong PHI elimination."),
   cl::location(StrongPHIElim),
   cl::init(false));
+static cl::opt<std::string>
+TrapFuncName("trap-func", cl::Hidden,
+  cl::desc("Emit a call to trap function rather than a trap instruction"),
+  cl::init(""));
 static cl::opt<bool>
 DataSections("fdata-sections",
   cl::desc("Emit data into separate sections"),
@@ -221,7 +226,9 @@ TargetMachine::TargetMachine(const Target &T)
   : TheTarget(T), AsmInfo(0),
     MCRelaxAll(false),
     MCNoExecStack(false),
-    MCUseLoc(true) {
+    MCSaveTempLabels(false),
+    MCUseLoc(true),
+    MCUseCFI(true) {
   // Typically it will be subtargets that will adjust FloatABIType from Default
   // to Soft or Hard.
   if (UseSoftFloat)
@@ -303,4 +310,11 @@ namespace llvm {
   bool HonorSignDependentRoundingFPMath() {
     return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
   }
+
+  /// getTrapFunctionName - If this returns a non-empty string, this means isel
+  /// should lower Intrinsic::trap to a call to the specified function name
+  /// instead of an ISD::TRAP node.
+  StringRef getTrapFunctionName() {
+    return TrapFuncName;
+  }
 }
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 8fe549b..c352bfc 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -53,6 +53,14 @@ private:
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands,
                                MCStreamer &Out);
 
+  /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
+  /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
+  bool isSrcOp(X86Operand &Op);
+
+  /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode
+  /// or %es:(%edi) in 32bit mode.
+  bool isDstOp(X86Operand &Op);
+
   /// @name Auto-generated Matcher Functions
   /// {
 
@@ -356,6 +364,24 @@ struct X86Operand : public MCParsedAsmOperand {
 
 } // end anonymous namespace.
 
+bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
+  unsigned basereg = Is64Bit ? X86::RSI : X86::ESI;
+
+  return (Op.isMem() &&
+    (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
+    isa<MCConstantExpr>(Op.Mem.Disp) &&
+    cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+    Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
+}
+
+bool X86ATTAsmParser::isDstOp(X86Operand &Op) {
+  unsigned basereg = Is64Bit ? X86::RDI : X86::EDI;
+
+  return Op.isMem() && Op.Mem.SegReg == X86::ES &&
+    isa<MCConstantExpr>(Op.Mem.Disp) &&
+    cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+    Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
+}
 
 bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
                                     SMLoc &StartLoc, SMLoc &EndLoc) {
@@ -788,7 +814,106 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
       delete &Op;
     }
   }
-  
+  // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
+  if (Name.startswith("ins") && Operands.size() == 3 &&
+      (Name == "insb" || Name == "insw" || Name == "insl")) {
+    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+    X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+    if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
+      Operands.pop_back();
+      Operands.pop_back();
+      delete &Op;
+      delete &Op2;
+    }
+  }
+
+  // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
+  if (Name.startswith("outs") && Operands.size() == 3 &&
+      (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
+    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+    X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+    if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
+      Operands.pop_back();
+      Operands.pop_back();
+      delete &Op;
+      delete &Op2;
+    }
+  }
+
+  // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
+  if (Name.startswith("movs") && Operands.size() == 3 &&
+      (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
+       (Is64Bit && Name == "movsq"))) {
+    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+    X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+    if (isSrcOp(Op) && isDstOp(Op2)) {
+      Operands.pop_back();
+      Operands.pop_back();
+      delete &Op;
+      delete &Op2;
+    }
+  }
+  // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
+  if (Name.startswith("lods") && Operands.size() == 3 &&
+      (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
+       Name == "lodsl" || (Is64Bit && Name == "lodsq"))) {
+    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+    X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+    if (isSrcOp(*Op1) && Op2->isReg()) {
+      const char *ins;
+      unsigned reg = Op2->getReg();
+      bool isLods = Name == "lods";
+      if (reg == X86::AL && (isLods || Name == "lodsb"))
+        ins = "lodsb";
+      else if (reg == X86::AX && (isLods || Name == "lodsw"))
+        ins = "lodsw";
+      else if (reg == X86::EAX && (isLods || Name == "lodsl"))
+        ins = "lodsl";
+      else if (reg == X86::RAX && (isLods || Name == "lodsq"))
+        ins = "lodsq";
+      else
+        ins = NULL;
+      if (ins != NULL) {
+        Operands.pop_back();
+        Operands.pop_back();
+        delete Op1;
+        delete Op2;
+        if (Name != ins)
+          static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+      }
+    }
+  }
+  // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
+  if (Name.startswith("stos") && Operands.size() == 3 &&
+      (Name == "stos" || Name == "stosb" || Name == "stosw" ||
+       Name == "stosl" || (Is64Bit && Name == "stosq"))) {
+    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+    X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+    if (isDstOp(*Op2) && Op1->isReg()) {
+      const char *ins;
+      unsigned reg = Op1->getReg();
+      bool isStos = Name == "stos";
+      if (reg == X86::AL && (isStos || Name == "stosb"))
+        ins = "stosb";
+      else if (reg == X86::AX && (isStos || Name == "stosw"))
+        ins = "stosw";
+      else if (reg == X86::EAX && (isStos || Name == "stosl"))
+        ins = "stosl";
+      else if (reg == X86::RAX && (isStos || Name == "stosq"))
+        ins = "stosq";
+      else
+        ins = NULL;
+      if (ins != NULL) {
+        Operands.pop_back();
+        Operands.pop_back();
+        delete Op1;
+        delete Op2;
+        if (Name != ins)
+          static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+      }
+    }
+  }
+
   // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>.  Canonicalize to
   // "shift <op>".
   if ((Name.startswith("shr") || Name.startswith("sar") ||
@@ -803,6 +928,18 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
       Operands.erase(Operands.begin() + 1);
     }
   }
+  
+  // Transforms "int $3" into "int3" as a size optimization.  We can't write an
+  // instalias with an immediate operand yet.
+  if (Name == "int" && Operands.size() == 2) {
+    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+    if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+        cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
+      delete Operands[1];
+      Operands.erase(Operands.begin() + 1);
+      static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
+    }
+  }
 
   return false;
 }
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index f777756..d8a105e 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -409,6 +409,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
   case TYPE_XMM32:
   case TYPE_XMM64:
   case TYPE_XMM128:
+  case TYPE_XMM256:
   case TYPE_DEBUGREG:
   case TYPE_CONTROLREG:
     return translateRMRegister(mcInst, insn);
@@ -418,6 +419,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
   case TYPE_M32:
   case TYPE_M64:
   case TYPE_M128:
+  case TYPE_M256:
   case TYPE_M512:
   case TYPE_Mv:
   case TYPE_M32FP:
@@ -500,6 +502,9 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
   case ENCODING_Rv:
     translateRegister(mcInst, insn.opcodeRegister);
     return false;
+  case ENCODING_VVVV:
+    translateRegister(mcInst, insn.vvvv);
+    return false;
   case ENCODING_DUP:
     return translateOperand(mcInst,
                             insn.spec->operands[operand.type - TYPE_DUP0],
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index b6546fc..de1610b 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -75,6 +75,12 @@ static int modRMRequired(OpcodeType type,
   case THREEBYTE_3A:
     decision = &THREEBYTE3A_SYM;
     break;
+  case THREEBYTE_A6:
+    decision = &THREEBYTEA6_SYM;
+    break;
+  case THREEBYTE_A7:
+    decision = &THREEBYTEA7_SYM;
+    break;
   }
   
   return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
@@ -115,6 +121,12 @@ static InstrUID decode(OpcodeType type,
   case THREEBYTE_3A:
     dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
     break;
+  case THREEBYTE_A6:
+    dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
+  case THREEBYTE_A7:
+    dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
   }
   
   switch (dec->modrm_type) {
@@ -368,29 +380,109 @@ static int readPrefixes(struct InternalInstruction* insn) {
     if (isPrefix)
       dbgprintf(insn, "Found prefix 0x%hhx", byte);
   }
+    
+  insn->vexSize = 0;
   
-  if (insn->mode == MODE_64BIT) {
-    if ((byte & 0xf0) == 0x40) {
-      uint8_t opcodeByte;
+  if (byte == 0xc4) {
+    uint8_t byte1;
       
-      if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
-        dbgprintf(insn, "Redundant REX prefix");
-        return -1;
+    if (lookAtByte(insn, &byte1)) {
+      dbgprintf(insn, "Couldn't read second byte of VEX");
+      return -1;
+    }
+    
+    if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+      insn->vexSize = 3;
+      insn->necessaryPrefixLocation = insn->readerCursor - 1;
+    }
+    else {
+      unconsumeByte(insn);
+      insn->necessaryPrefixLocation = insn->readerCursor - 1;
+    }
+    
+    if (insn->vexSize == 3) {
+      insn->vexPrefix[0] = byte;
+      consumeByte(insn, &insn->vexPrefix[1]);
+      consumeByte(insn, &insn->vexPrefix[2]);
+
+      /* We simulate the REX prefix for simplicity's sake */
+    
+      insn->rexPrefix = 0x40 
+                      | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
+                      | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
+                      | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
+                      | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
+    
+      switch (ppFromVEX3of3(insn->vexPrefix[2]))
+      {
+      default:
+        break;
+      case VEX_PREFIX_66:
+        hasOpSize = TRUE;      
+        break;
       }
+    
+      dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
+    }
+  }
+  else if (byte == 0xc5) {
+    uint8_t byte1;
+    
+    if (lookAtByte(insn, &byte1)) {
+      dbgprintf(insn, "Couldn't read second byte of VEX");
+      return -1;
+    }
       
-      insn->rexPrefix = byte;
-      insn->necessaryPrefixLocation = insn->readerCursor - 2;
-      
-      dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
-    } else {                
+    if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+      insn->vexSize = 2;
+    }
+    else {
+      unconsumeByte(insn);
+    }
+    
+    if (insn->vexSize == 2) {
+      insn->vexPrefix[0] = byte;
+      consumeByte(insn, &insn->vexPrefix[1]);
+        
+      insn->rexPrefix = 0x40 
+                      | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
+        
+      switch (ppFromVEX2of2(insn->vexPrefix[1]))
+      {
+      default:
+        break;
+      case VEX_PREFIX_66:
+        hasOpSize = TRUE;      
+        break;
+      }
+         
+      dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
+    }
+  }
+  else {
+    if (insn->mode == MODE_64BIT) {
+      if ((byte & 0xf0) == 0x40) {
+        uint8_t opcodeByte;
+          
+        if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
+          dbgprintf(insn, "Redundant REX prefix");
+          return -1;
+        }
+          
+        insn->rexPrefix = byte;
+        insn->necessaryPrefixLocation = insn->readerCursor - 2;
+          
+        dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
+      } else {                
+        unconsumeByte(insn);
+        insn->necessaryPrefixLocation = insn->readerCursor - 1;
+      }
+    } else {
       unconsumeByte(insn);
       insn->necessaryPrefixLocation = insn->readerCursor - 1;
     }
-  } else {
-    unconsumeByte(insn);
-    insn->necessaryPrefixLocation = insn->readerCursor - 1;
   }
-  
+
   if (insn->mode == MODE_16BIT) {
     insn->registerSize       = (hasOpSize ? 4 : 2);
     insn->addressSize        = (hasAdSize ? 4 : 2);
@@ -438,6 +530,39 @@ static int readOpcode(struct InternalInstruction* insn) {
   dbgprintf(insn, "readOpcode()");
   
   insn->opcodeType = ONEBYTE;
+    
+  if (insn->vexSize == 3)
+  {
+    switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
+    {
+    default:
+      dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
+      return -1;      
+    case 0:
+      break;
+    case VEX_LOB_0F:
+      insn->twoByteEscape = 0x0f;
+      insn->opcodeType = TWOBYTE;
+      return consumeByte(insn, &insn->opcode);
+    case VEX_LOB_0F38:
+      insn->twoByteEscape = 0x0f;
+      insn->threeByteEscape = 0x38;
+      insn->opcodeType = THREEBYTE_38;
+      return consumeByte(insn, &insn->opcode);
+    case VEX_LOB_0F3A:    
+      insn->twoByteEscape = 0x0f;
+      insn->threeByteEscape = 0x3a;
+      insn->opcodeType = THREEBYTE_3A;
+      return consumeByte(insn, &insn->opcode);
+    }
+  }
+  else if (insn->vexSize == 2)
+  {
+    insn->twoByteEscape = 0x0f;
+    insn->opcodeType = TWOBYTE;
+    return consumeByte(insn, &insn->opcode);
+  }
+    
   if (consumeByte(insn, &current))
     return -1;
   
@@ -467,6 +592,24 @@ static int readOpcode(struct InternalInstruction* insn) {
         return -1;
       
       insn->opcodeType = THREEBYTE_3A;
+    } else if (current == 0xa6) {
+      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+      
+      insn->threeByteEscape = current;
+      
+      if (consumeByte(insn, &current))
+        return -1;
+      
+      insn->opcodeType = THREEBYTE_A6;
+    } else if (current == 0xa7) {
+      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+      
+      insn->threeByteEscape = current;
+      
+      if (consumeByte(insn, &current))
+        return -1;
+      
+      insn->opcodeType = THREEBYTE_A7;
     } else {
       dbgprintf(insn, "Didn't find a three-byte escape prefix");
       
@@ -600,20 +743,64 @@ static int getID(struct InternalInstruction* insn) {
   dbgprintf(insn, "getID()");
     
   attrMask = ATTR_NONE;
-  
+
   if (insn->mode == MODE_64BIT)
     attrMask |= ATTR_64BIT;
-  
-  if (insn->rexPrefix & 0x08)
-    attrMask |= ATTR_REXW;
-  
-  if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
-    attrMask |= ATTR_OPSIZE;
-  else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
-    attrMask |= ATTR_XS;
-  else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
-    attrMask |= ATTR_XD;
-  
+    
+  if (insn->vexSize) {
+    attrMask |= ATTR_VEX;
+
+    if (insn->vexSize == 3) {
+      switch (ppFromVEX3of3(insn->vexPrefix[2])) {
+      case VEX_PREFIX_66:
+        attrMask |= ATTR_OPSIZE;    
+        break;
+      case VEX_PREFIX_F3:
+        attrMask |= ATTR_XS;
+        break;
+      case VEX_PREFIX_F2:
+        attrMask |= ATTR_XD;
+        break;
+      }
+    
+      if (wFromVEX3of3(insn->vexPrefix[2]))
+        attrMask |= ATTR_REXW;
+      if (lFromVEX3of3(insn->vexPrefix[2]))
+        attrMask |= ATTR_VEXL;
+    }
+    else if (insn->vexSize == 2) {
+      switch (ppFromVEX2of2(insn->vexPrefix[1])) {
+      case VEX_PREFIX_66:
+        attrMask |= ATTR_OPSIZE;    
+        break;
+      case VEX_PREFIX_F3:
+        attrMask |= ATTR_XS;
+        break;
+      case VEX_PREFIX_F2:
+        attrMask |= ATTR_XD;
+        break;
+      }
+    
+      if (lFromVEX2of2(insn->vexPrefix[1]))
+        attrMask |= ATTR_VEXL;
+    }
+    else {
+      return -1;
+    }
+  }
+  else {
+    if (insn->rexPrefix & 0x08)
+      attrMask |= ATTR_REXW;
+  
+    if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
+      attrMask |= ATTR_OPSIZE;
+    else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
+      attrMask |= ATTR_XS;
+    else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
+      attrMask |= ATTR_XD;
+    
+  }
+
   if (getIDWithAttrMask(&instructionID, insn, attrMask))
     return -1;
   
@@ -749,7 +936,7 @@ static int readSIB(struct InternalInstruction* insn) {
     insn->sibIndex = SIB_INDEX_NONE;
     break;
   default:
-    insn->sibIndex = (EABase)(sibIndexBase + index);
+    insn->sibIndex = (SIBIndex)(sibIndexBase + index);
     if (insn->sibIndex == SIB_INDEX_sib ||
         insn->sibIndex == SIB_INDEX_sib64)
       insn->sibIndex = SIB_INDEX_NONE;
@@ -796,7 +983,7 @@ static int readSIB(struct InternalInstruction* insn) {
     }
     break;
   default:
-    insn->sibBase = (EABase)(sibBaseBase + base);
+    insn->sibBase = (SIBBase)(sibBaseBase + base);
     break;
   }
   
@@ -1012,6 +1199,8 @@ static int readModRM(struct InternalInstruction* insn) {
       return prefix##_EAX + index;                        \
     case TYPE_R64:                                        \
       return prefix##_RAX + index;                        \
+    case TYPE_XMM256:                                     \
+      return prefix##_YMM0 + index;                       \
     case TYPE_XMM128:                                     \
     case TYPE_XMM64:                                      \
     case TYPE_XMM32:                                      \
@@ -1073,6 +1262,14 @@ static int fixupReg(struct InternalInstruction *insn,
   default:
     debug("Expected a REG or R/M encoding in fixupReg");
     return -1;
+  case ENCODING_VVVV:
+    insn->vvvv = (Reg)fixupRegValue(insn,
+                                    (OperandType)op->type,
+                                    insn->vvvv,
+                                    &valid);
+    if (!valid)
+      return -1;
+    break;
   case ENCODING_REG:
     insn->reg = (Reg)fixupRegValue(insn,
                                    (OperandType)op->type,
@@ -1237,6 +1434,27 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
 }
 
 /*
+ * readVVVV - Consumes an immediate operand from an instruction, given the
+ *   desired operand size.
+ *
+ * @param insn  - The instruction whose operand is to be read.
+ * @return      - 0 if the immediate was successfully consumed; nonzero
+ *                otherwise.
+ */
+static int readVVVV(struct InternalInstruction* insn) {
+  dbgprintf(insn, "readVVVV()");
+        
+  if (insn->vexSize == 3)
+    insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
+  else if (insn->vexSize == 2)
+    insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
+  else
+    return -1;
+
+  return 0;
+}
+
+/*
  * readOperands - Consults the specifier for an instruction and consumes all
  *   operands for that instruction, interpreting them as it goes.
  *
@@ -1317,6 +1535,13 @@ static int readOperands(struct InternalInstruction* insn) {
     case ENCODING_I:
       if (readOpcodeModifier(insn))
         return -1;
+      break;
+    case ENCODING_VVVV:
+      if (readVVVV(insn))
+        return -1;
+      if (fixupReg(insn, &insn->spec->operands[index]))
+        return -1;
+      break;
     case ENCODING_DUP:
       break;
     default:
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index d0dc8b5..a9c90f8 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -34,16 +34,30 @@ extern "C" {
 /*
  * Accessor functions for various fields of an Intel instruction
  */
-#define modFromModRM(modRM)  ((modRM & 0xc0) >> 6)
-#define regFromModRM(modRM)  ((modRM & 0x38) >> 3)
-#define rmFromModRM(modRM)   (modRM & 0x7)
-#define scaleFromSIB(sib)    ((sib & 0xc0) >> 6)
-#define indexFromSIB(sib)    ((sib & 0x38) >> 3)
-#define baseFromSIB(sib)     (sib & 0x7)
-#define wFromREX(rex)        ((rex & 0x8) >> 3)
-#define rFromREX(rex)        ((rex & 0x4) >> 2)
-#define xFromREX(rex)        ((rex & 0x2) >> 1)
-#define bFromREX(rex)        (rex & 0x1)
+#define modFromModRM(modRM)  (((modRM) & 0xc0) >> 6)
+#define regFromModRM(modRM)  (((modRM) & 0x38) >> 3)
+#define rmFromModRM(modRM)   ((modRM) & 0x7)
+#define scaleFromSIB(sib)    (((sib) & 0xc0) >> 6)
+#define indexFromSIB(sib)    (((sib) & 0x38) >> 3)
+#define baseFromSIB(sib)     ((sib) & 0x7)
+#define wFromREX(rex)        (((rex) & 0x8) >> 3)
+#define rFromREX(rex)        (((rex) & 0x4) >> 2)
+#define xFromREX(rex)        (((rex) & 0x2) >> 1)
+#define bFromREX(rex)        ((rex) & 0x1)
+    
+#define rFromVEX2of3(vex)       (((~(vex)) & 0x80) >> 7)
+#define xFromVEX2of3(vex)       (((~(vex)) & 0x40) >> 6)
+#define bFromVEX2of3(vex)       (((~(vex)) & 0x20) >> 5)
+#define mmmmmFromVEX2of3(vex)   ((vex) & 0x1f)
+#define wFromVEX3of3(vex)       (((vex) & 0x80) >> 7)
+#define vvvvFromVEX3of3(vex)    (((~(vex)) & 0x78) >> 3)
+#define lFromVEX3of3(vex)       (((vex) & 0x4) >> 2)
+#define ppFromVEX3of3(vex)      ((vex) & 0x3)
+
+#define rFromVEX2of2(vex)       (((~(vex)) & 0x80) >> 7)
+#define vvvvFromVEX2of2(vex)    (((~(vex)) & 0x78) >> 3)
+#define lFromVEX2of2(vex)       (((vex) & 0x4) >> 2)
+#define ppFromVEX2of2(vex)      ((vex) & 0x3)
 
 /*
  * These enums represent Intel registers for use by the decoder.
@@ -206,7 +220,25 @@ extern "C" {
   ENTRY(XMM13)    \
   ENTRY(XMM14)    \
   ENTRY(XMM15)
-  
+
+#define REGS_YMM  \
+  ENTRY(YMM0)     \
+  ENTRY(YMM1)     \
+  ENTRY(YMM2)     \
+  ENTRY(YMM3)     \
+  ENTRY(YMM4)     \
+  ENTRY(YMM5)     \
+  ENTRY(YMM6)     \
+  ENTRY(YMM7)     \
+  ENTRY(YMM8)     \
+  ENTRY(YMM9)     \
+  ENTRY(YMM10)    \
+  ENTRY(YMM11)    \
+  ENTRY(YMM12)    \
+  ENTRY(YMM13)    \
+  ENTRY(YMM14)    \
+  ENTRY(YMM15)
+    
 #define REGS_SEGMENT \
   ENTRY(ES)          \
   ENTRY(CS)          \
@@ -252,6 +284,7 @@ extern "C" {
   REGS_64BIT          \
   REGS_MMX            \
   REGS_XMM            \
+  REGS_YMM            \
   REGS_SEGMENT        \
   REGS_DEBUG          \
   REGS_CONTROL        \
@@ -332,6 +365,27 @@ typedef enum {
   SEG_OVERRIDE_GS,
   SEG_OVERRIDE_max
 } SegmentOverride;
+    
+/*
+ * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
+ */
+
+typedef enum {
+  VEX_LOB_0F = 0x1,
+  VEX_LOB_0F38 = 0x2,
+  VEX_LOB_0F3A = 0x3
+} VEXLeadingOpcodeByte;
+
+/*
+ * VEXPrefixCode - Possible values for the VEX.pp field
+ */
+
+typedef enum {
+  VEX_PREFIX_NONE = 0x0,
+  VEX_PREFIX_66 = 0x1,
+  VEX_PREFIX_F3 = 0x2,
+  VEX_PREFIX_F2 = 0x3
+} VEXPrefixCode;
 
 typedef uint8_t BOOL;
 
@@ -389,10 +443,12 @@ struct InternalInstruction {
   uint8_t prefixPresent[0x100];
   /* contains the location (for use with the reader) of the prefix byte */
   uint64_t prefixLocations[0x100];
+  /* The value of the VEX prefix, if present */
+  uint8_t vexPrefix[3];
+  /* The length of the VEX prefix (0 if not present) */
+  uint8_t vexSize;
   /* The value of the REX prefix, if present */
   uint8_t rexPrefix;
-  /* The location of the REX prefix */
-  uint64_t rexLocation;
   /* The location where a mandatory prefix would have to be (i.e., right before
      the opcode, or right before the REX prefix if one is present) */
   uint64_t necessaryPrefixLocation;
@@ -428,6 +484,10 @@ struct InternalInstruction {
   /* state for additional bytes, consumed during operand decode.  Pattern:
      consumed___ indicates that the byte was already consumed and does not
      need to be consumed again */
+
+  /* The VEX.vvvv field, which contains a third register operand for some AVX
+     instructions */
+  Reg                           vvvv;
   
   /* The ModR/M byte, which contains most register operands and some portion of
      all memory operands */
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 1425b86..70315ed 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -30,6 +30,8 @@
 #define TWOBYTE_SYM       x86DisassemblerTwoByteOpcodes
 #define THREEBYTE38_SYM   x86DisassemblerThreeByte38Opcodes
 #define THREEBYTE3A_SYM   x86DisassemblerThreeByte3AOpcodes
+#define THREEBYTEA6_SYM   x86DisassemblerThreeByteA6Opcodes
+#define THREEBYTEA7_SYM   x86DisassemblerThreeByteA7Opcodes
 
 #define INSTRUCTIONS_STR  "x86DisassemblerInstrSpecifiers"
 #define CONTEXTS_STR      "x86DisassemblerContexts"
@@ -37,6 +39,8 @@
 #define TWOBYTE_STR       "x86DisassemblerTwoByteOpcodes"
 #define THREEBYTE38_STR   "x86DisassemblerThreeByte38Opcodes"
 #define THREEBYTE3A_STR   "x86DisassemblerThreeByte3AOpcodes"
+#define THREEBYTEA6_STR   "x86DisassemblerThreeByteA6Opcodes"
+#define THREEBYTEA7_STR   "x86DisassemblerThreeByteA7Opcodes"
 
 /*
  * Attributes of an instruction that must be known before the opcode can be
@@ -49,7 +53,9 @@
   ENUM_ENTRY(ATTR_XS,     0x02) \
   ENUM_ENTRY(ATTR_XD,     0x04) \
   ENUM_ENTRY(ATTR_REXW,   0x08) \
-  ENUM_ENTRY(ATTR_OPSIZE, 0x10)
+  ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
+  ENUM_ENTRY(ATTR_VEX,    0x20) \
+  ENUM_ENTRY(ATTR_VEXL,   0x40)
 
 #define ENUM_ENTRY(n, v) n = v,
 enum attributeBits {
@@ -87,7 +93,20 @@ enum attributeBits {
                                         "IC_64BIT_REXW_XS")                    \
   ENUM_ENTRY(IC_64BIT_REXW_OPSIZE,  7,  "The Dynamic Duo!  Prefer over all "   \
                                         "else because this changes most "      \
-                                        "operands' meaning")
+                                        "operands' meaning")                   \
+  ENUM_ENTRY(IC_VEX,                1,  "requires a VEX prefix")               \
+  ENUM_ENTRY(IC_VEX_XS,             2,  "requires VEX and the XS prefix")      \
+  ENUM_ENTRY(IC_VEX_XD,             2,  "requires VEX and the XD prefix")      \
+  ENUM_ENTRY(IC_VEX_OPSIZE,         2,  "requires VEX and the OpSize prefix")  \
+  ENUM_ENTRY(IC_VEX_W,              3,  "requires VEX and the W prefix")       \
+  ENUM_ENTRY(IC_VEX_W_XS,           4,  "requires VEX, W, and XS prefix")      \
+  ENUM_ENTRY(IC_VEX_W_XD,           4,  "requires VEX, W, and XD prefix")      \
+  ENUM_ENTRY(IC_VEX_W_OPSIZE,       4,  "requires VEX, W, and OpSize")         \
+  ENUM_ENTRY(IC_VEX_L,              3,  "requires VEX and the L prefix")       \
+  ENUM_ENTRY(IC_VEX_L_XS,           4,  "requires VEX and the L and XS prefix")\
+  ENUM_ENTRY(IC_VEX_L_XD,           4,  "requires VEX and the L and XS prefix")\
+  ENUM_ENTRY(IC_VEX_L_OPSIZE,       4,  "requires VEX, L, and OpSize")
+
 
 #define ENUM_ENTRY(n, r, d) n,    
 typedef enum {
@@ -104,7 +123,9 @@ typedef enum {
   ONEBYTE       = 0,
   TWOBYTE       = 1,
   THREEBYTE_38  = 2,
-  THREEBYTE_3A  = 3
+  THREEBYTE_3A  = 3,
+  THREEBYTE_A6  = 4,
+  THREEBYTE_A7  = 5
 } OpcodeType;
 
 /*
@@ -183,6 +204,7 @@ struct ContextDecision {
   ENUM_ENTRY(ENCODING_NONE,   "")                                              \
   ENUM_ENTRY(ENCODING_REG,    "Register operand in ModR/M byte.")              \
   ENUM_ENTRY(ENCODING_RM,     "R/M operand in ModR/M byte.")                   \
+  ENUM_ENTRY(ENCODING_VVVV,   "Register operand in VEX.vvvv byte.")            \
   ENUM_ENTRY(ENCODING_CB,     "1-byte code offset (possible new CS value)")    \
   ENUM_ENTRY(ENCODING_CW,     "2-byte")                                        \
   ENUM_ENTRY(ENCODING_CD,     "4-byte")                                        \
@@ -278,6 +300,7 @@ struct ContextDecision {
   ENUM_ENTRY(TYPE_XMM32,      "4-byte XMM register or memory operand")         \
   ENUM_ENTRY(TYPE_XMM64,      "8-byte")                                        \
   ENUM_ENTRY(TYPE_XMM128,     "16-byte")                                       \
+  ENUM_ENTRY(TYPE_XMM256,     "32-byte")                                       \
   ENUM_ENTRY(TYPE_XMM0,       "Implicit use of XMM0")                          \
   ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \
   ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index d6950f4..dd6e353 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -15,6 +15,7 @@
 #define DEBUG_TYPE "asm-printer"
 #include "X86ATTInstPrinter.h"
 #include "X86InstComments.h"
+#include "X86Subtarget.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
@@ -22,24 +23,38 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
 #include "X86GenInstrNames.inc"
+#include <map>
 using namespace llvm;
 
 // Include the auto-generated portion of the assembly writer.
 #define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "X86GenRegisterNames.inc"
 #include "X86GenAsmWriter.inc"
+#undef PRINT_ALIAS_INSTR
+#undef GET_INSTRUCTION_NAME
+
+X86ATTInstPrinter::X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+  : MCInstPrinter(MAI) {
+  // Initialize the set of available features.
+  setAvailableFeatures(ComputeAvailableFeatures(
+            &TM.getSubtarget<X86Subtarget>()));
+}
 
 void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
-  printInstruction(MI, OS);
+  // Try to print any aliases first.
+  if (!printAliasInstr(MI, OS))
+    printInstruction(MI, OS);
   
   // If verbose assembly is enabled, we can print some informative comments.
   if (CommentStream)
     EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
 }
+
 StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
   return getInstructionName(Opcode);
 }
 
-
 void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
                                    raw_ostream &O) {
   switch (MI->getOperand(Op).getImm()) {
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index eb98664..8d69391 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -17,16 +17,24 @@
 #include "llvm/MC/MCInstPrinter.h"
 
 namespace llvm {
-  class MCOperand;
+
+class MCOperand;
+class X86Subtarget;
+class TargetMachine;
   
 class X86ATTInstPrinter : public MCInstPrinter {
 public:
-  X86ATTInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
-
+  X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI);
   
   virtual void printInst(const MCInst *MI, raw_ostream &OS);
   virtual StringRef getOpcodeName(unsigned Opcode) const;
 
+  // Methods used to print the alias of an instruction.
+  unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
+  // Autogenerated by tblgen, returns true if we successfully printed an
+  // alias.
+  bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &OS);
   static const char *getRegisterName(unsigned RegNo);
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 12144e3..c642acc 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -111,28 +111,28 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     // FALL THROUGH.
   case X86::PUNPCKLBWrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLMask(16, ShuffleMask);
+    DecodePUNPCKLBWMask(16, ShuffleMask);
     break;
   case X86::PUNPCKLWDrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PUNPCKLWDrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLMask(8, ShuffleMask);
+    DecodePUNPCKLWDMask(8, ShuffleMask);
     break;
   case X86::PUNPCKLDQrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PUNPCKLDQrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLMask(4, ShuffleMask);
+    DecodePUNPCKLDQMask(4, ShuffleMask);
     break;
   case X86::PUNPCKLQDQrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PUNPCKLQDQrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLMask(2, ShuffleMask);
+    DecodePUNPCKLQDQMask(2, ShuffleMask);
     break;
 
   case X86::SHUFPDrri:
@@ -153,16 +153,44 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::UNPCKLPDrm:
-    DecodeUNPCKLPMask(2, ShuffleMask);
+    DecodeUNPCKLPDMask(2, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
+  case X86::VUNPCKLPDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKLPDrm:
+    DecodeUNPCKLPDMask(2, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
+  case X86::VUNPCKLPDYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKLPDYrm:
+    DecodeUNPCKLPDMask(4, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
   case X86::UNPCKLPSrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::UNPCKLPSrm:
-    DecodeUNPCKLPMask(4, ShuffleMask);
+    DecodeUNPCKLPSMask(4, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
+  case X86::VUNPCKLPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKLPSrm:
+    DecodeUNPCKLPSMask(4, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
+  case X86::VUNPCKLPSYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKLPSYrm:
+    DecodeUNPCKLPSMask(8, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
   case X86::UNPCKHPDrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 0484529..47253eb 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -15,6 +15,7 @@
 #define DEBUG_TYPE "asm-printer"
 #include "X86IntelInstPrinter.h"
 #include "X86InstComments.h"
+#include "X86Subtarget.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 6f12032..ca99dc0 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -18,13 +18,15 @@
 #include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
-  class MCOperand;
+
+class MCOperand;
+class TargetMachine;
   
 class X86IntelInstPrinter : public MCInstPrinter {
 public:
-  X86IntelInstPrinter(const MCAsmInfo &MAI)
+  X86IntelInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
     : MCInstPrinter(MAI) {}
-  
+
   virtual void printInst(const MCInst *MI, raw_ostream &OS);
   virtual StringRef getOpcodeName(unsigned Opcode) const;
   
@@ -33,7 +35,6 @@ public:
   static const char *getRegisterName(unsigned RegNo);
   static const char *getInstructionName(unsigned Opcode);
 
-
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
   void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
index e21d69a..e7429a3 100644
--- a/lib/Target/X86/README-X86-64.txt
+++ b/lib/Target/X86/README-X86-64.txt
@@ -36,7 +36,7 @@ _conv:
 	cmovb %rcx, %rax
 	ret
 
-Seems like the jb branch has high likelyhood of being taken. It would have
+Seems like the jb branch has high likelihood of being taken. It would have
 saved a few instructions.
 
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index abd1515..ea3014e 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -7,14 +7,6 @@ copy (3-addr bswap + memory support?)  This is available on Atom processors.
 
 //===---------------------------------------------------------------------===//
 
-CodeGen/X86/lea-3.ll:test3 should be a single LEA, not a shift/move.  The X86
-backend knows how to three-addressify this shift, but it appears the register
-allocator isn't even asking it to do so in this case.  We should investigate
-why this isn't happening, it could have significant impact on other important
-cases for X86 as well.
-
-//===---------------------------------------------------------------------===//
-
 This should be one DIV/IDIV instruction, not a libcall:
 
 unsigned test(unsigned long long X, unsigned Y) {
@@ -1572,7 +1564,7 @@ Implement processor-specific optimizations for parity with GCC on these
 processors.  GCC does two optimizations:
 
 1. ix86_pad_returns inserts a noop before ret instructions if immediately
-   preceeded by a conditional branch or is the target of a jump.
+   preceded by a conditional branch or is the target of a jump.
 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of
    code contains more than 3 branches.
    
@@ -1656,28 +1648,61 @@ information to add the "lock" prefix.
 
 //===---------------------------------------------------------------------===//
 
-_Bool bar(int *x) { return *x & 1; }
+struct B {
+  unsigned char y0 : 1;
+};
 
-define zeroext i1 @bar(i32* nocapture %x) nounwind readonly {
-entry:
-  %tmp1 = load i32* %x                            ; <i32> [#uses=1]
-  %and = and i32 %tmp1, 1                         ; <i32> [#uses=1]
-  %tobool = icmp ne i32 %and, 0                   ; <i1> [#uses=1]
-  ret i1 %tobool
+int bar(struct B* a) { return a->y0; }
+
+define i32 @bar(%struct.B* nocapture %a) nounwind readonly optsize {
+  %1 = getelementptr inbounds %struct.B* %a, i64 0, i32 0
+  %2 = load i8* %1, align 1
+  %3 = and i8 %2, 1
+  %4 = zext i8 %3 to i32
+  ret i32 %4
 }
 
-bar:                                                        # @bar
-# BB#0:                                                     # %entry
-	movl	4(%esp), %eax
-	movb	(%eax), %al
-	andb	$1, %al
-	movzbl	%al, %eax
-	ret
+bar:                                    # @bar
+# BB#0:
+        movb    (%rdi), %al
+        andb    $1, %al
+        movzbl  %al, %eax
+        ret
 
 Missed optimization: should be movl+andl.
 
 //===---------------------------------------------------------------------===//
 
+The x86_64 abi says:
+
+Booleans, when stored in a memory object, are stored as single byte objects the
+value of which is always 0 (false) or 1 (true).
+
+We are not using this fact:
+
+int bar(_Bool *a) { return *a; }
+
+define i32 @bar(i8* nocapture %a) nounwind readonly optsize {
+  %1 = load i8* %a, align 1, !tbaa !0
+  %tmp = and i8 %1, 1
+  %2 = zext i8 %tmp to i32
+  ret i32 %2
+}
+
+bar:
+        movb    (%rdi), %al
+        andb    $1, %al
+        movzbl  %al, %eax
+        ret
+
+GCC produces
+
+bar:
+        movzbl  (%rdi), %eax
+        ret
+
+//===---------------------------------------------------------------------===//
+
 Consider the following two functions compiled with clang:
 _Bool foo(int *x) { return !(*x & 4); }
 unsigned bar(int *x) { return !(*x & 4); }
@@ -1703,26 +1728,6 @@ are functionally identical.
 //===---------------------------------------------------------------------===//
 
 Take the following C code:
-int x(int y) { return (y & 63) << 14; }
-
-Code produced by gcc:
-	andl	$63, %edi
-	sall	$14, %edi
-	movl	%edi, %eax
-	ret
-
-Code produced by clang:
-	shll	$14, %edi
-	movl	%edi, %eax
-	andl	$1032192, %eax
-	ret
-
-The code produced by gcc is 3 bytes shorter.  This sort of construct often
-shows up with bitfields.
-
-//===---------------------------------------------------------------------===//
-
-Take the following C code:
 int f(int a, int b) { return (unsigned char)a == (unsigned char)b; }
 
 We generate the following IR with clang:
@@ -1947,3 +1952,91 @@ which is "perfect".
 
 //===---------------------------------------------------------------------===//
 
+For the branch in the following code:
+int a();
+int b(int x, int y) {
+  if (x & (1<<(y&7)))
+    return a();
+  return y;
+}
+
+We currently generate:
+	movb	%sil, %al
+	andb	$7, %al
+	movzbl	%al, %eax
+	btl	%eax, %edi
+	jae	.LBB0_2
+
+movl+andl would be shorter than the movb+andb+movzbl sequence.
+
+//===---------------------------------------------------------------------===//
+
+For the following:
+struct u1 {
+    float x, y;
+};
+float foo(struct u1 u) {
+    return u.x + u.y;
+}
+
+We currently generate:
+	movdqa	%xmm0, %xmm1
+	pshufd	$1, %xmm0, %xmm0        # xmm0 = xmm0[1,0,0,0]
+	addss	%xmm1, %xmm0
+	ret
+
+We could save an instruction here by commuting the addss.
+
+//===---------------------------------------------------------------------===//
+
+This (from PR9661):
+
+float clamp_float(float a) {
+        if (a > 1.0f)
+                return 1.0f;
+        else if (a < 0.0f)
+                return 0.0f;
+        else
+                return a;
+}
+
+Could compile to:
+
+clamp_float:                            # @clamp_float
+        movss   .LCPI0_0(%rip), %xmm1
+        minss   %xmm1, %xmm0
+        pxor    %xmm1, %xmm1
+        maxss   %xmm1, %xmm0
+        ret
+
+with -ffast-math.
+
+//===---------------------------------------------------------------------===//
+
+This function (from PR9803):
+
+int clamp2(int a) {
+        if (a > 5)
+                a = 5;
+        if (a < 0) 
+                return 0;
+        return a;
+}
+
+Compiles to:
+
+_clamp2:                                ## @clamp2
+        pushq   %rbp
+        movq    %rsp, %rbp
+        cmpl    $5, %edi
+        movl    $5, %ecx
+        cmovlel %edi, %ecx
+        testl   %ecx, %ecx
+        movl    $0, %eax
+        cmovnsl %ecx, %eax
+        popq    %rbp
+        ret
+
+The move of 0 could be scheduled above the test to make it is xor reg,reg.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 1287977..cd06060 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -1,4 +1,4 @@
-//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===//
+//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -95,12 +95,29 @@ void DecodePSHUFLWMask(unsigned Imm,
   ShuffleMask.push_back(7);
 }
 
-void DecodePUNPCKLMask(unsigned NElts,
+void DecodePUNPCKLBWMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+                          SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLMask(EVT VT,
                        SmallVectorImpl<unsigned> &ShuffleMask) {
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(i);
-    ShuffleMask.push_back(i+NElts);
-  }
+  DecodeUNPCKLPMask(VT, ShuffleMask);
 }
 
 void DecodePUNPCKHMask(unsigned NElts,
@@ -133,15 +150,40 @@ void DecodeUNPCKHPMask(unsigned NElts,
   }
 }
 
+void DecodeUNPCKLPSMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
 
 /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
-/// etc.  NElts indicates the number of elements in the vector allowing it to
-/// handle different datatypes and vector widths.
-void DecodeUNPCKLPMask(unsigned NElts,
+/// etc.  VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
                        SmallVectorImpl<unsigned> &ShuffleMask) {
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(i);        // Reads from dest
-    ShuffleMask.push_back(i+NElts);  // Reads from src
+  unsigned NumElts = VT.getVectorNumElements();
+
+  // Handle vector lengths > 128 bits.  Define a "section" as a set of
+  // 128 bits.  AVX defines UNPCK* to operate independently on 128-bit
+  // sections.
+  unsigned NumSections = VT.getSizeInBits() / 128;
+  if (NumSections == 0 ) NumSections = 1;  // Handle MMX
+  unsigned NumSectionElts = NumElts / NumSections;
+
+  unsigned Start = 0;
+  unsigned End = NumSectionElts / 2;
+  for (unsigned s = 0; s < NumSections; ++s) {
+    for (unsigned i = Start; i != End; ++i) {
+      ShuffleMask.push_back(i);                 // Reads from dest/src1
+      ShuffleMask.push_back(i+NumSectionElts);  // Reads from src/src2
+    }
+    // Process the next 128 bits.
+    Start += NumSectionElts;
+    End += NumSectionElts;
   }
 }
 
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 50d9ccb..b18f670 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -16,6 +16,7 @@
 #define X86_SHUFFLE_DECODE_H
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
 
 //===----------------------------------------------------------------------===//
 //  Vector Mask Decoding
@@ -45,7 +46,19 @@ void DecodePSHUFHWMask(unsigned Imm,
 void DecodePSHUFLWMask(unsigned Imm,
                        SmallVectorImpl<unsigned> &ShuffleMask);
 
-void DecodePUNPCKLMask(unsigned NElts,
+void DecodePUNPCKLBWMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+                          SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLMask(EVT VT,
                        SmallVectorImpl<unsigned> &ShuffleMask);
 
 void DecodePUNPCKHMask(unsigned NElts,
@@ -57,11 +70,16 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
 void DecodeUNPCKHPMask(unsigned NElts,
                        SmallVectorImpl<unsigned> &ShuffleMask);
 
+void DecodeUNPCKLPSMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask);
 
 /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
-/// etc.  NElts indicates the number of elements in the vector allowing it to
-/// handle different datatypes and vector widths.
-void DecodeUNPCKLPMask(unsigned NElts,
+/// etc.  VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
                        SmallVectorImpl<unsigned> &ShuffleMask);
 
 } // llvm namespace
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index efb6c8c..25b8d3e 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -1,13 +1,13 @@
 //===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
-// This is a target description file for the Intel i386 architecture, refered to
+// This is a target description file for the Intel i386 architecture, referred to
 // here as the "X86" architecture.
 //
 //===----------------------------------------------------------------------===//
@@ -32,7 +32,7 @@ def FeatureMMX     : SubtargetFeature<"mmx","X86SSELevel", "MMX",
 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
                                       "Enable SSE instructions",
                                       // SSE codegen depends on cmovs, and all
-                                      // SSE1+ processors support them. 
+                                      // SSE1+ processors support them.
                                       [FeatureMMX, FeatureCMOV]>;
 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
                                       "Enable SSE2 instructions",
@@ -50,7 +50,8 @@ def FeatureSSE42   : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
                                       "Enable SSE 4.2 instructions",
                                       [FeatureSSE41, FeaturePOPCNT]>;
 def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
-                                      "Enable 3DNow! instructions">;
+                                      "Enable 3DNow! instructions",
+                                      [FeatureMMX]>;
 def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
                                       "Enable 3DNow! Athlon instructions",
                                       [Feature3DNow]>;
@@ -125,10 +126,10 @@ def : Proc<"sandybridge",     [FeatureSSE42, Feature64Bit,
                                FeatureAES, FeatureCLMUL]>;
 
 def : Proc<"k6",              [FeatureMMX]>;
-def : Proc<"k6-2",            [FeatureMMX,    Feature3DNow]>;
-def : Proc<"k6-3",            [FeatureMMX,    Feature3DNow]>;
-def : Proc<"athlon",          [FeatureMMX,    Feature3DNowA, FeatureSlowBTMem]>;
-def : Proc<"athlon-tbird",    [FeatureMMX,    Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"k6-2",            [Feature3DNow]>;
+def : Proc<"k6-3",            [Feature3DNow]>;
+def : Proc<"athlon",          [Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-tbird",    [Feature3DNowA, FeatureSlowBTMem]>;
 def : Proc<"athlon-4",        [FeatureSSE1,   Feature3DNowA, FeatureSlowBTMem]>;
 def : Proc<"athlon-xp",       [FeatureSSE1,   Feature3DNowA, FeatureSlowBTMem]>;
 def : Proc<"athlon-mp",       [FeatureSSE1,   Feature3DNowA, FeatureSlowBTMem]>;
@@ -156,8 +157,8 @@ def : Proc<"shanghai",        [Feature3DNowA, Feature64Bit, FeatureSSE4A,
                                Feature3DNowA]>;
 
 def : Proc<"winchip-c6",      [FeatureMMX]>;
-def : Proc<"winchip2",        [FeatureMMX, Feature3DNow]>;
-def : Proc<"c3",              [FeatureMMX, Feature3DNow]>;
+def : Proc<"winchip2",        [Feature3DNow]>;
+def : Proc<"c3",              [Feature3DNow]>;
 def : Proc<"c3-2",            [FeatureSSE1]>;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index da5f5b1..4d7d96d 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -21,6 +21,7 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -28,6 +29,13 @@
 #include "llvm/Target/TargetAsmBackend.h"
 using namespace llvm;
 
+// Option to allow disabling arithmetic relaxation to workaround PR9807, which
+// is useful when running bitwise comparison experiments on Darwin. We should be
+// able to remove this once PR9807 is resolved.
+static cl::opt<bool>
+MCDisableArithRelaxation("mc-x86-disable-arith-relaxation",
+         cl::desc("Disable relaxation of arithmetic instruction for X86"));
+
 static unsigned getFixupKindLog2Size(unsigned Kind) {
   switch (Kind) {
   default: assert(0 && "invalid fixup kind!");
@@ -201,6 +209,9 @@ bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
   if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
     return true;
 
+  if (MCDisableArithRelaxation)
+    return false;
+
   // Check if this instruction is ever relaxable.
   if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
     return false;
@@ -307,10 +318,13 @@ public:
     : ELFX86AsmBackend(T, OSType) {}
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createELFObjectWriter(new X86ELFObjectWriter(false, OSType,
-                                                        ELF::EM_386, false),
+    return createELFObjectWriter(createELFObjectTargetWriter(),
                                  OS, /*IsLittleEndian*/ true);
   }
+
+  MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+    return new X86ELFObjectWriter(false, OSType, ELF::EM_386, false);
+  }
 };
 
 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
@@ -319,10 +333,13 @@ public:
     : ELFX86AsmBackend(T, OSType) {}
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createELFObjectWriter(new X86ELFObjectWriter(true, OSType,
-                                                        ELF::EM_X86_64, true),
+    return createELFObjectWriter(createELFObjectTargetWriter(),
                                  OS, /*IsLittleEndian*/ true);
   }
+
+  MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+    return new X86ELFObjectWriter(true, OSType, ELF::EM_X86_64, true);
+  }
 };
 
 class WindowsX86AsmBackend : public X86AsmBackend {
@@ -408,34 +425,26 @@ public:
 
 TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
                                                const std::string &TT) {
-  switch (Triple(TT).getOS()) {
-  case Triple::Darwin:
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
     return new DarwinX86_32AsmBackend(T);
-  case Triple::MinGW32:
-  case Triple::Cygwin:
-  case Triple::Win32:
-    if (Triple(TT).getEnvironment() == Triple::MachO)
-      return new DarwinX86_32AsmBackend(T);
-    else
-      return new WindowsX86AsmBackend(T, false);
-  default:
-    return new ELFX86_32AsmBackend(T, Triple(TT).getOS());
-  }
+
+  if (TheTriple.isOSWindows())
+    return new WindowsX86AsmBackend(T, false);
+
+  return new ELFX86_32AsmBackend(T, TheTriple.getOS());
 }
 
 TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
                                                const std::string &TT) {
-  switch (Triple(TT).getOS()) {
-  case Triple::Darwin:
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
     return new DarwinX86_64AsmBackend(T);
-  case Triple::MinGW32:
-  case Triple::Cygwin:
-  case Triple::Win32:
-    if (Triple(TT).getEnvironment() == Triple::MachO)
-      return new DarwinX86_64AsmBackend(T);
-    else
-      return new WindowsX86AsmBackend(T, true);
-  default:
-    return new ELFX86_64AsmBackend(T, Triple(TT).getOS());
-  }
+
+  if (TheTriple.isOSWindows())
+    return new WindowsX86AsmBackend(T, true);
+
+  return new ELFX86_64AsmBackend(T, TheTriple.getOS());
 }
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 99b4479..c2d53c4 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -709,12 +709,13 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
 //===----------------------------------------------------------------------===//
 
 static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+                                             TargetMachine &TM,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI) {
   if (SyntaxVariant == 0)
-    return new X86ATTInstPrinter(MAI);
+    return new X86ATTInstPrinter(TM, MAI);
   if (SyntaxVariant == 1)
-    return new X86IntelInstPrinter(MAI);
+    return new X86IntelInstPrinter(TM, MAI);
   return 0;
 }
 
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index a44fb69..5635175 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -215,6 +215,13 @@ def CC_X86_Win64_C : CallingConv<[
   // The first 4 integer arguments are passed in integer registers.
   CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
                                           [XMM0, XMM1, XMM2, XMM3]>>,
+  
+  // Do not pass the sret argument in RCX, the Win64 thiscall calling
+  // convention requires "this" to be passed in RCX.                                        
+  CCIfCC<"CallingConv::X86_ThisCall", 
+    CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[RDX , R8  , R9  ],
+                                                     [XMM1, XMM2, XMM3]>>>>,
+
   CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8  , R9  ],
                                           [XMM0, XMM1, XMM2, XMM3]>>,
 
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 60d9d4a..421e221 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -652,6 +652,8 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
   case X86II::TB:  // Two-byte opcode prefix
   case X86II::T8:  // 0F 38
   case X86II::TA:  // 0F 3A
+  case X86II::A6:  // 0F A6
+  case X86II::A7:  // 0F A7
     Need0FPrefix = true;
     break;
   case X86II::TF: // F2 0F 38
@@ -695,6 +697,12 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
   case X86II::TA:    // 0F 3A
     MCE.emitByte(0x3A);
     break;
+  case X86II::A6:    // 0F A6
+    MCE.emitByte(0xA6);
+    break;
+  case X86II::A7:    // 0F A7
+    MCE.emitByte(0xA7);
+    break;
   }
 
   // If this is a two-address instruction, skip one of the register operands.
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 6fa9284..1382f18 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -23,6 +23,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -77,10 +78,8 @@ private:
 
   bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
 
-  bool X86FastEmitStore(EVT VT, const Value *Val,
-                        const X86AddressMode &AM);
-  bool X86FastEmitStore(EVT VT, unsigned Val,
-                        const X86AddressMode &AM);
+  bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
+  bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
 
   bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
                          unsigned &ResultReg);
@@ -125,6 +124,8 @@ private:
 
   unsigned TargetMaterializeAlloca(const AllocaInst *C);
 
+  unsigned TargetMaterializeFloatZero(const ConstantFP *CF);
+
   /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
   /// computed in an SSE register, not on the X87 floating point stack.
   bool isScalarFPTypeInSSEReg(EVT VT) const {
@@ -133,6 +134,9 @@ private:
   }
 
   bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
+
+  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
+                          X86AddressMode SrcAM, uint64_t Len);
 };
 
 } // end anonymous namespace.
@@ -224,8 +228,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
 /// and a displacement offset, or a GlobalAddress,
 /// i.e. V. Return true if it is possible.
 bool
-X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
-                              const X86AddressMode &AM) {
+X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
   // Get opcode and regclass of the output for the given store instruction.
   unsigned Opc = 0;
   switch (VT.getSimpleVT().SimpleTy) {
@@ -395,37 +398,45 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
       const Value *Op = *i;
       if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
         const StructLayout *SL = TD.getStructLayout(STy);
-        unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
-        Disp += SL->getElementOffset(Idx);
-      } else {
-        uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
-        SmallVector<const Value *, 4> Worklist;
-        Worklist.push_back(Op);
-        do {
-          Op = Worklist.pop_back_val();
-          if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
-            // Constant-offset addressing.
-            Disp += CI->getSExtValue() * S;
-          } else if (isa<AddOperator>(Op) &&
-                     isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
-            // An add with a constant operand. Fold the constant.
-            ConstantInt *CI =
-              cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
-            Disp += CI->getSExtValue() * S;
-            // Add the other operand back to the work list.
-            Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
-          } else if (IndexReg == 0 &&
-                     (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
-                     (S == 1 || S == 2 || S == 4 || S == 8)) {
-            // Scaled-index addressing.
-            Scale = S;
-            IndexReg = getRegForGEPIndex(Op).first;
-            if (IndexReg == 0)
-              return false;
-          } else
-            // Unsupported.
-            goto unsupported_gep;
-        } while (!Worklist.empty());
+        Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
+        continue;
+      }
+      
+      // A array/variable index is always of the form i*S where S is the
+      // constant scale size.  See if we can push the scale into immediates.
+      uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+      for (;;) {
+        if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+          // Constant-offset addressing.
+          Disp += CI->getSExtValue() * S;
+          break;
+        }
+        if (isa<AddOperator>(Op) &&
+            (!isa<Instruction>(Op) ||
+             FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+               == FuncInfo.MBB) &&
+            isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+          // An add (in the same block) with a constant operand. Fold the
+          // constant.
+          ConstantInt *CI =
+            cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+          Disp += CI->getSExtValue() * S;
+          // Iterate on the other operand.
+          Op = cast<AddOperator>(Op)->getOperand(0);
+          continue;
+        }
+        if (IndexReg == 0 &&
+            (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
+            (S == 1 || S == 2 || S == 4 || S == 8)) {
+          // Scaled-index addressing.
+          Scale = S;
+          IndexReg = getRegForGEPIndex(Op).first;
+          if (IndexReg == 0)
+            return false;
+          break;
+        }
+        // Unsupported.
+        goto unsupported_gep;
       }
     }
     // Check for displacement overflow.
@@ -439,7 +450,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
     if (X86SelectAddress(U->getOperand(0), AM))
       return true;
 
-    // If we couldn't merge the sub value into this addr mode, revert back to
+    // If we couldn't merge the gep value into this addr mode, revert back to
     // our address and just match the value instead of completely failing.
     AM = SavedAM;
     break;
@@ -451,91 +462,91 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
 
   // Handle constant address.
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
-    // Can't handle alternate code models yet.
+    // Can't handle alternate code models or TLS yet.
     if (TM.getCodeModel() != CodeModel::Small)
       return false;
 
-    // RIP-relative addresses can't have additional register operands.
-    if (Subtarget->isPICStyleRIPRel() &&
-        (AM.Base.Reg != 0 || AM.IndexReg != 0))
-      return false;
-
-    // Can't handle TLS yet.
     if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
       if (GVar->isThreadLocal())
         return false;
+    
+    // RIP-relative addresses can't have additional register operands, so if
+    // we've already folded stuff into the addressing mode, just force the
+    // global value into its own register, which we can use as the basereg.
+    if (!Subtarget->isPICStyleRIPRel() ||
+        (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
+      // Okay, we've committed to selecting this global. Set up the address.
+      AM.GV = GV;
+
+      // Allow the subtarget to classify the global.
+      unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
+
+      // If this reference is relative to the pic base, set it now.
+      if (isGlobalRelativeToPICBase(GVFlags)) {
+        // FIXME: How do we know Base.Reg is free??
+        AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+      }
 
-    // Okay, we've committed to selecting this global. Set up the basic address.
-    AM.GV = GV;
-
-    // Allow the subtarget to classify the global.
-    unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
-
-    // If this reference is relative to the pic base, set it now.
-    if (isGlobalRelativeToPICBase(GVFlags)) {
-      // FIXME: How do we know Base.Reg is free??
-      AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
-    }
-
-    // Unless the ABI requires an extra load, return a direct reference to
-    // the global.
-    if (!isGlobalStubReference(GVFlags)) {
-      if (Subtarget->isPICStyleRIPRel()) {
-        // Use rip-relative addressing if we can.  Above we verified that the
-        // base and index registers are unused.
-        assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
-        AM.Base.Reg = X86::RIP;
+      // Unless the ABI requires an extra load, return a direct reference to
+      // the global.
+      if (!isGlobalStubReference(GVFlags)) {
+        if (Subtarget->isPICStyleRIPRel()) {
+          // Use rip-relative addressing if we can.  Above we verified that the
+          // base and index registers are unused.
+          assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+          AM.Base.Reg = X86::RIP;
+        }
+        AM.GVOpFlags = GVFlags;
+        return true;
       }
-      AM.GVOpFlags = GVFlags;
-      return true;
-    }
 
-    // Ok, we need to do a load from a stub.  If we've already loaded from this
-    // stub, reuse the loaded pointer, otherwise emit the load now.
-    DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
-    unsigned LoadReg;
-    if (I != LocalValueMap.end() && I->second != 0) {
-      LoadReg = I->second;
-    } else {
-      // Issue load from stub.
-      unsigned Opc = 0;
-      const TargetRegisterClass *RC = NULL;
-      X86AddressMode StubAM;
-      StubAM.Base.Reg = AM.Base.Reg;
-      StubAM.GV = GV;
-      StubAM.GVOpFlags = GVFlags;
-
-      // Prepare for inserting code in the local-value area.
-      SavePoint SaveInsertPt = enterLocalValueArea();
-
-      if (TLI.getPointerTy() == MVT::i64) {
-        Opc = X86::MOV64rm;
-        RC  = X86::GR64RegisterClass;
-
-        if (Subtarget->isPICStyleRIPRel())
-          StubAM.Base.Reg = X86::RIP;
+      // Ok, we need to do a load from a stub.  If we've already loaded from
+      // this stub, reuse the loaded pointer, otherwise emit the load now.
+      DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+      unsigned LoadReg;
+      if (I != LocalValueMap.end() && I->second != 0) {
+        LoadReg = I->second;
       } else {
-        Opc = X86::MOV32rm;
-        RC  = X86::GR32RegisterClass;
-      }
+        // Issue load from stub.
+        unsigned Opc = 0;
+        const TargetRegisterClass *RC = NULL;
+        X86AddressMode StubAM;
+        StubAM.Base.Reg = AM.Base.Reg;
+        StubAM.GV = GV;
+        StubAM.GVOpFlags = GVFlags;
+
+        // Prepare for inserting code in the local-value area.
+        SavePoint SaveInsertPt = enterLocalValueArea();
+
+        if (TLI.getPointerTy() == MVT::i64) {
+          Opc = X86::MOV64rm;
+          RC  = X86::GR64RegisterClass;
+
+          if (Subtarget->isPICStyleRIPRel())
+            StubAM.Base.Reg = X86::RIP;
+        } else {
+          Opc = X86::MOV32rm;
+          RC  = X86::GR32RegisterClass;
+        }
 
-      LoadReg = createResultReg(RC);
-      MachineInstrBuilder LoadMI =
-        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
-      addFullAddress(LoadMI, StubAM);
+        LoadReg = createResultReg(RC);
+        MachineInstrBuilder LoadMI =
+          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
+        addFullAddress(LoadMI, StubAM);
 
-      // Ok, back to normal mode.
-      leaveLocalValueArea(SaveInsertPt);
+        // Ok, back to normal mode.
+        leaveLocalValueArea(SaveInsertPt);
 
-      // Prevent loading GV stub multiple times in same MBB.
-      LocalValueMap[V] = LoadReg;
-    }
+        // Prevent loading GV stub multiple times in same MBB.
+        LocalValueMap[V] = LoadReg;
+      }
 
-    // Now construct the final address. Note that the Disp, Scale,
-    // and Index values may already be set here.
-    AM.Base.Reg = LoadReg;
-    AM.GV = 0;
-    return true;
+      // Now construct the final address. Note that the Disp, Scale,
+      // and Index values may already be set here.
+      AM.Base.Reg = LoadReg;
+      AM.GV = 0;
+      return true;
+    }
   }
 
   // If all else fails, try to materialize the value in a register.
@@ -856,12 +867,9 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
 
     unsigned NEReg = createResultReg(&X86::GR8RegClass);
     unsigned PReg = createResultReg(&X86::GR8RegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-            TII.get(X86::SETNEr), NEReg);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-            TII.get(X86::SETPr), PReg);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-            TII.get(X86::OR8rr), ResultReg)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNEr), NEReg);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETPr), PReg);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::OR8rr),ResultReg)
       .addReg(PReg).addReg(NEReg);
     UpdateValueMap(I, ResultReg);
     return true;
@@ -1059,14 +1067,49 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
         }
       }
     }
+  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+    // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
+    // typically happen for _Bool and C++ bools.
+    MVT SourceVT;
+    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+        isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
+      unsigned TestOpc = 0;
+      switch (SourceVT.SimpleTy) {
+      default: break;
+      case MVT::i8:  TestOpc = X86::TEST8ri; break;
+      case MVT::i16: TestOpc = X86::TEST16ri; break;
+      case MVT::i32: TestOpc = X86::TEST32ri; break;
+      case MVT::i64: TestOpc = X86::TEST64ri32; break;
+      }
+      if (TestOpc) {
+        unsigned OpReg = getRegForValue(TI->getOperand(0));
+        if (OpReg == 0) return false;
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc))
+          .addReg(OpReg).addImm(1);
+        
+        unsigned JmpOpc = X86::JNE_4;
+        if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
+          std::swap(TrueMBB, FalseMBB);
+          JmpOpc = X86::JE_4;
+        }
+        
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc))
+          .addMBB(TrueMBB);
+        FastEmitBranch(FalseMBB, DL);
+        FuncInfo.MBB->addSuccessor(TrueMBB);
+        return true;
+      }
+    }
   }
 
   // Otherwise do a clumsy setcc and re-test it.
+  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
+  // in an explicit cast, so make sure to handle that correctly.
   unsigned OpReg = getRegForValue(BI->getCondition());
   if (OpReg == 0) return false;
 
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
-    .addReg(OpReg).addReg(OpReg);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8ri))
+    .addReg(OpReg).addImm(1);
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4))
     .addMBB(TrueMBB);
   FastEmitBranch(FalseMBB, DL);
@@ -1075,42 +1118,42 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
 }
 
 bool X86FastISel::X86SelectShift(const Instruction *I) {
-  unsigned CReg = 0, OpReg = 0, OpImm = 0;
+  unsigned CReg = 0, OpReg = 0;
   const TargetRegisterClass *RC = NULL;
   if (I->getType()->isIntegerTy(8)) {
     CReg = X86::CL;
     RC = &X86::GR8RegClass;
     switch (I->getOpcode()) {
-    case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break;
-    case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break;
-    case Instruction::Shl:  OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
+    case Instruction::LShr: OpReg = X86::SHR8rCL; break;
+    case Instruction::AShr: OpReg = X86::SAR8rCL; break;
+    case Instruction::Shl:  OpReg = X86::SHL8rCL; break;
     default: return false;
     }
   } else if (I->getType()->isIntegerTy(16)) {
     CReg = X86::CX;
     RC = &X86::GR16RegClass;
     switch (I->getOpcode()) {
-    case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break;
-    case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break;
-    case Instruction::Shl:  OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
+    case Instruction::LShr: OpReg = X86::SHR16rCL; break;
+    case Instruction::AShr: OpReg = X86::SAR16rCL; break;
+    case Instruction::Shl:  OpReg = X86::SHL16rCL; break;
     default: return false;
     }
   } else if (I->getType()->isIntegerTy(32)) {
     CReg = X86::ECX;
     RC = &X86::GR32RegClass;
     switch (I->getOpcode()) {
-    case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break;
-    case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break;
-    case Instruction::Shl:  OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
+    case Instruction::LShr: OpReg = X86::SHR32rCL; break;
+    case Instruction::AShr: OpReg = X86::SAR32rCL; break;
+    case Instruction::Shl:  OpReg = X86::SHL32rCL; break;
     default: return false;
     }
   } else if (I->getType()->isIntegerTy(64)) {
     CReg = X86::RCX;
     RC = &X86::GR64RegClass;
     switch (I->getOpcode()) {
-    case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break;
-    case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break;
-    case Instruction::Shl:  OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break;
+    case Instruction::LShr: OpReg = X86::SHR64rCL; break;
+    case Instruction::AShr: OpReg = X86::SAR64rCL; break;
+    case Instruction::Shl:  OpReg = X86::SHL64rCL; break;
     default: return false;
     }
   } else {
@@ -1124,15 +1167,6 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
   unsigned Op0Reg = getRegForValue(I->getOperand(0));
   if (Op0Reg == 0) return false;
 
-  // Fold immediate in shl(x,3).
-  if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-    unsigned ResultReg = createResultReg(RC);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm),
-            ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
-    UpdateValueMap(I, ResultReg);
-    return true;
-  }
-
   unsigned Op1Reg = getRegForValue(I->getOperand(1));
   if (Op1Reg == 0) return false;
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
@@ -1294,10 +1328,61 @@ bool X86FastISel::X86SelectExtractValue(const Instruction *I) {
   return false;
 }
 
+bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
+                                     X86AddressMode SrcAM, uint64_t Len) {
+  // Make sure we don't bloat code by inlining very large memcpy's.
+  bool i64Legal = TLI.isTypeLegal(MVT::i64);
+  if (Len > (i64Legal ? 32 : 16)) return false;
+
+  // We don't care about alignment here since we just emit integer accesses.
+  while (Len) {
+    MVT VT;
+    if (Len >= 8 && i64Legal)
+      VT = MVT::i64;
+    else if (Len >= 4)
+      VT = MVT::i32;
+    else if (Len >= 2)
+      VT = MVT::i16;
+    else {
+      assert(Len == 1);
+      VT = MVT::i8;
+    }
+
+    unsigned Reg;
+    bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
+    RV &= X86FastEmitStore(VT, Reg, DestAM);
+    assert(RV && "Failed to emit load or store??");
+
+    unsigned Size = VT.getSizeInBits()/8;
+    Len -= Size;
+    DestAM.Disp += Size;
+    SrcAM.Disp += Size;
+  }
+
+  return true;
+}
+
 bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
   // FIXME: Handle more intrinsics.
   switch (I.getIntrinsicID()) {
   default: return false;
+  case Intrinsic::memcpy: {
+    const MemCpyInst &MCI = cast<MemCpyInst>(I);
+    // Don't handle volatile or variable length memcpys.
+    if (MCI.isVolatile() || !isa<ConstantInt>(MCI.getLength()))
+      return false;
+
+    uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
+    
+    // Get the address of the dest and source addresses.
+    X86AddressMode DestAM, SrcAM;
+    if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
+        !X86SelectAddress(MCI.getRawSource(), SrcAM))
+      return false;
+
+    return TryEmitSmallMemcpy(DestAM, SrcAM, Len);
+  }
+      
   case Intrinsic::stackprotector: {
     // Emit code inline code to store the stack guard onto the stack.
     EVT PtrTy = TLI.getPointerTy();
@@ -1308,17 +1393,14 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
     // Grab the frame index.
     X86AddressMode AM;
     if (!X86SelectAddress(Slot, AM)) return false;
-
     if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
-
     return true;
   }
   case Intrinsic::objectsize: {
-    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
+    // FIXME: This should be moved to generic code!
+    ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
     const Type *Ty = I.getCalledFunction()->getReturnType();
 
-    assert(CI && "Non-constant type in Intrinsic::objectsize?");
-
     MVT VT;
     if (!isTypeLegal(Ty, VT))
       return false;
@@ -1356,6 +1438,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
   }
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::uadd_with_overflow: {
+    // FIXME: Should fold immediates.
+    
     // Replace "add with overflow" intrinsics with an "add" instruction followed
     // by a seto/setc instruction. Later on, when the "extractvalue"
     // instructions are encountered, we use the fact that two registers were
@@ -1427,8 +1511,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
   // Handle only C and fastcc calling conventions for now.
   ImmutableCallSite CS(CI);
   CallingConv::ID CC = CS.getCallingConv();
-  if (CC != CallingConv::C &&
-      CC != CallingConv::Fast &&
+  if (CC != CallingConv::C && CC != CallingConv::Fast &&
       CC != CallingConv::X86_FastCall)
     return false;
 
@@ -1437,14 +1520,17 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
   if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
     return false;
 
-  // Let SDISel handle vararg functions.
   const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
   const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
-  if (FTy->isVarArg())
+  bool isVarArg = FTy->isVarArg();
+
+  // Don't know how to handle Win64 varargs yet.  Nothing special needed for
+  // x86-32.  Special handling for x86-64 is implemented.
+  if (isVarArg && Subtarget->isTargetWin64())
     return false;
 
   // Fast-isel doesn't know about callee-pop yet.
-  if (Subtarget->IsCalleePop(FTy->isVarArg(), CC))
+  if (Subtarget->IsCalleePop(isVarArg, CC))
     return false;
 
   // Handle *simple* calls for now.
@@ -1487,9 +1573,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
   ArgFlags.reserve(CS.arg_size());
   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
        i != e; ++i) {
-    unsigned Arg = getRegForValue(*i);
-    if (Arg == 0)
-      return false;
+    Value *ArgVal = *i;
     ISD::ArgFlagsTy Flags;
     unsigned AttrInd = i - CS.arg_begin() + 1;
     if (CS.paramHasAttr(AttrInd, Attribute::SExt))
@@ -1497,34 +1581,67 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
     if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
       Flags.setZExt();
 
+    // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
+    // instruction.  This is safe because it is common to all fastisel supported
+    // calling conventions on x86.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) {
+      if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 ||
+          CI->getBitWidth() == 16) {
+        if (Flags.isSExt())
+          ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext()));
+        else
+          ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext()));
+      }
+    }
+    
+    unsigned ArgReg;
+    
+    // Passing bools around ends up doing a trunc to i1 and passing it.
+    // Codegen this as an argument + "and 1".
+    if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
+        cast<TruncInst>(ArgVal)->getParent() == I->getParent() &&
+        ArgVal->hasOneUse()) {
+      ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
+      ArgReg = getRegForValue(ArgVal);
+      if (ArgReg == 0) return false;
+      
+      MVT ArgVT;
+      if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
+      
+      ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
+                           ArgVal->hasOneUse(), 1);
+    } else {
+      ArgReg = getRegForValue(ArgVal);
+    }
+
+    if (ArgReg == 0) return false;
+
     // FIXME: Only handle *easy* calls for now.
     if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
-        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
         CS.paramHasAttr(AttrInd, Attribute::Nest) ||
         CS.paramHasAttr(AttrInd, Attribute::ByVal))
       return false;
 
-    const Type *ArgTy = (*i)->getType();
+    const Type *ArgTy = ArgVal->getType();
     MVT ArgVT;
     if (!isTypeLegal(ArgTy, ArgVT))
       return false;
     unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
     Flags.setOrigAlign(OriginalAlignment);
 
-    Args.push_back(Arg);
-    ArgVals.push_back(*i);
+    Args.push_back(ArgReg);
+    ArgVals.push_back(ArgVal);
     ArgVTs.push_back(ArgVT);
     ArgFlags.push_back(Flags);
   }
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
+  CCState CCInfo(CC, isVarArg, TM, ArgLocs, I->getParent()->getContext());
 
   // Allocate shadow area for Win64
-  if (Subtarget->isTargetWin64()) {
+  if (Subtarget->isTargetWin64())
     CCInfo.AllocateStack(32, 8);
-  }
 
   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
 
@@ -1618,6 +1735,17 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
             X86::EBX).addReg(Base);
   }
 
+  if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
+    // Count the number of XMM registers allocated.
+    static const unsigned XMMArgRegs[] = {
+      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+    };
+    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::MOV8ri),
+            X86::AL).addImm(NumXMMRegs);
+  }
+
   // Issue the call.
   MachineInstrBuilder MIB;
   if (CalleeOp) {
@@ -1656,7 +1784,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
       OpFlags = X86II::MO_PLT;
     } else if (Subtarget->isPICStyleStubAny() &&
                (GV->isDeclaration() || GV->isWeakForLinker()) &&
-               Subtarget->getDarwinVers() < 9) {
+               (!Subtarget->getTargetTriple().isMacOSX() ||
+                Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
       // PC-relative references to external symbols should go through $stub,
       // unless we're building with the leopard linker or later, which
       // automatically synthesizes these stubs.
@@ -1672,14 +1801,20 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
   if (Subtarget->isPICStyleGOT())
     MIB.addReg(X86::EBX);
 
+  if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
+    MIB.addReg(X86::AL);
+
   // Add implicit physical register uses to the call.
   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
     MIB.addReg(RegArgs[i]);
 
   // Issue CALLSEQ_END
   unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+  unsigned NumBytesCallee = 0;
+  if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet))
+    NumBytesCallee = 4;
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
-    .addImm(NumBytes).addImm(0);
+    .addImm(NumBytes).addImm(NumBytesCallee);
 
   // Now handle call return value (if any).
   SmallVector<unsigned, 4> UsedRegs;
@@ -1850,10 +1985,13 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
   if (isa<GlobalValue>(C)) {
     X86AddressMode AM;
     if (X86SelectAddress(C, AM)) {
-      if (TLI.getPointerTy() == MVT::i32)
-        Opc = X86::LEA32r;
-      else
-        Opc = X86::LEA64r;
+      // If the expression is just a basereg, then we're done, otherwise we need
+      // to emit an LEA.
+      if (AM.BaseType == X86AddressMode::RegBase &&
+          AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0)
+        return AM.Base.Reg;
+      
+      Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
       unsigned ResultReg = createResultReg(RC);
       addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                              TII.get(Opc), ResultReg), AM);
@@ -1915,6 +2053,45 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
   return ResultReg;
 }
 
+unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
+  MVT VT;
+  if (!isTypeLegal(CF->getType(), VT))
+    return false;
+
+  // Get opcode and regclass for the given zero.
+  unsigned Opc = 0;
+  const TargetRegisterClass *RC = NULL;
+  switch (VT.SimpleTy) {
+    default: return false;
+    case MVT::f32:
+      if (Subtarget->hasSSE1()) {
+        Opc = X86::FsFLD0SS;
+        RC  = X86::FR32RegisterClass;
+      } else {
+        Opc = X86::LD_Fp032;
+        RC  = X86::RFP32RegisterClass;
+      }
+      break;
+    case MVT::f64:
+      if (Subtarget->hasSSE2()) {
+        Opc = X86::FsFLD0SD;
+        RC  = X86::FR64RegisterClass;
+      } else {
+        Opc = X86::LD_Fp064;
+        RC  = X86::RFP64RegisterClass;
+      }
+      break;
+    case MVT::f80:
+      // No f80 support yet.
+      return false;
+  }
+
+  unsigned ResultReg = createResultReg(RC);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
+  return ResultReg;
+}
+
+
 /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
 /// vreg is being provided by the specified load instruction.  If possible,
 /// try to fold the load as an operand to the instruction, returning true if
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 3aaa693..325d061 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -1307,7 +1307,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
     // set up by FpSET_ST0, and our StackTop is off by one because of it.
     unsigned Op0 = getFPReg(MI->getOperand(0));
     // Restore the actual StackTop from before Fp_SET_ST0.
-    // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we
+    // Note we can't handle Fp_SET_ST1 without a preceding Fp_SET_ST0, and we
     // are not enforcing the constraint.
     ++StackTop;
     unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0).
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 0a3f931..06d12fc 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -22,6 +22,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
@@ -296,7 +297,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
   // FIXME: This is dirty hack. The code itself is pretty mess right now.
   // It should be rewritten from scratch and generalized sometimes.
 
-  // Determine maximum offset (minumum due to stack growth).
+  // Determine maximum offset (minimum due to stack growth).
   int64_t MaxOffset = 0;
   for (std::vector<CalleeSavedInfo>::const_iterator
          I = CSI.begin(), E = CSI.end(); I != E; ++I)
@@ -551,65 +552,71 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
   // responsible for adjusting the stack pointer.  Touching the stack at 4K
   // increments is necessary to ensure that the guard pages used by the OS
   // virtual memory manager are allocated in correct sequence.
-  if (NumBytes >= 4096 &&
-      (STI.isTargetCygMing() || STI.isTargetWin32()) &&
-      !STI.isTargetEnvMacho()) {
+  if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) {
+    const char *StackProbeSymbol;
+    bool isSPUpdateNeeded = false;
+
+    if (Is64Bit) {
+      if (STI.isTargetCygMing())
+        StackProbeSymbol = "___chkstk";
+      else {
+        StackProbeSymbol = "__chkstk";
+        isSPUpdateNeeded = true;
+      }
+    } else if (STI.isTargetCygMing())
+      StackProbeSymbol = "_alloca";
+    else
+      StackProbeSymbol = "_chkstk";
+
     // Check whether EAX is livein for this function.
     bool isEAXAlive = isEAXLiveIn(MF);
 
-    const char *StackProbeSymbol =
-      STI.isTargetWindows() ? "_chkstk" : "_alloca";
-    if (Is64Bit && STI.isTargetCygMing())
-      StackProbeSymbol = "__chkstk";
-    unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
-    if (!isEAXAlive) {
-      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
-        .addImm(NumBytes);
-      BuildMI(MBB, MBBI, DL, TII.get(CallOp))
-        .addExternalSymbol(StackProbeSymbol)
-        .addReg(StackPtr,    RegState::Define | RegState::Implicit)
-        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
-    } else {
+    if (isEAXAlive) {
+      // Sanity check that EAX is not livein for this function.
+      // It should not be, so throw an assert.
+      assert(!Is64Bit && "EAX is livein in x64 case!");
+
       // Save EAX
       BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
         .addReg(X86::EAX, RegState::Kill);
+    }
 
-      // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
-      // allocated bytes for EAX.
+    if (Is64Bit) {
+      // Handle the 64-bit Windows ABI case where we need to call __chkstk.
+      // Function prologue is responsible for adjusting the stack pointer.
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
+        .addImm(NumBytes);
+    } else {
+      // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
+      // We'll also use 4 already allocated bytes for EAX.
       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
-        .addImm(NumBytes - 4);
-      BuildMI(MBB, MBBI, DL, TII.get(CallOp))
-        .addExternalSymbol(StackProbeSymbol)
-        .addReg(StackPtr,    RegState::Define | RegState::Implicit)
-        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
-
-      // Restore EAX
-      MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
-                                              X86::EAX),
-                                      StackPtr, false, NumBytes - 4);
-      MBB.insert(MBBI, MI);
+        .addImm(isEAXAlive ? NumBytes - 4 : NumBytes);
+    }
+
+    BuildMI(MBB, MBBI, DL,
+            TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32))
+      .addExternalSymbol(StackProbeSymbol)
+      .addReg(StackPtr,    RegState::Define | RegState::Implicit)
+      .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+
+    // MSVC x64's __chkstk needs to adjust %rsp.
+    // FIXME: %rax preserves the offset and should be available.
+    if (isSPUpdateNeeded)
+      emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+                   TII, *RegInfo);
+
+    if (isEAXAlive) {
+        // Restore EAX
+        MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+                                                X86::EAX),
+                                        StackPtr, false, NumBytes - 4);
+        MBB.insert(MBBI, MI);
     }
-  } else if (NumBytes >= 4096 &&
-             STI.isTargetWin64() &&
-             !STI.isTargetEnvMacho()) {
-    // Sanity check that EAX is not livein for this function.  It should
-    // not be, so throw an assert.
-    assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!");
-
-    // Handle the 64-bit Windows ABI case where we need to call __chkstk.
-    // Function prologue is responsible for adjusting the stack pointer.
-    BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
-      .addImm(NumBytes);
-    BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32))
-      .addExternalSymbol("__chkstk")
-      .addReg(StackPtr, RegState::Define | RegState::Implicit);
-    emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
-                 TII, *RegInfo);
   } else if (NumBytes)
     emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
                  TII, *RegInfo);
 
-  if ((NumBytes || PushedRegs) && needsFrameMoves) {
+  if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
     // Mark end of stack pointer adjustment.
     MCSymbol *Label = MMI.getContext().CreateTempSymbol();
     BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
@@ -779,7 +786,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     assert(Offset >= 0 && "Offset should never be negative");
 
     if (Offset) {
-      // Check for possible merge with preceeding ADD instruction.
+      // Check for possible merge with preceding ADD instruction.
       Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
       emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
     }
@@ -823,7 +830,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     int delta = -1*X86FI->getTCReturnAddrDelta();
     MBBI = MBB.getLastNonDebugInstr();
 
-    // Check for possible merge with preceeding ADD instruction.
+    // Check for possible merge with preceding ADD instruction.
     delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
     emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
   }
@@ -892,7 +899,6 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   MachineFunction &MF = *MBB.getParent();
 
-  bool isWin64 = STI.isTargetWin64();
   unsigned SlotSize = STI.is64Bit() ? 8 : 4;
   unsigned FPReg = TRI->getFrameRegister(MF);
   unsigned CalleeFrameSize = 0;
@@ -900,25 +906,39 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
 
+  // Push GPRs. It increases frame size.
   unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
+    if (!X86::GR64RegClass.contains(Reg) &&
+        !X86::GR32RegClass.contains(Reg))
+      continue;
     // Add the callee-saved register as live-in. It's killed at the spill.
     MBB.addLiveIn(Reg);
     if (Reg == FPReg)
       // X86RegisterInfo::emitPrologue will handle spilling of frame register.
       continue;
-    if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
-      CalleeFrameSize += SlotSize;
-      BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill);
-    } else {
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
-                              RC, TRI);
-    }
+    CalleeFrameSize += SlotSize;
+    BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill);
   }
 
   X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
+
+  // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
+  // It can be done by spilling XMMs to stack frame.
+  // Note that only Win64 ABI might spill XMMs.
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    if (X86::GR64RegClass.contains(Reg) ||
+        X86::GR32RegClass.contains(Reg))
+      continue;
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(Reg);
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
+                            RC, TRI);
+  }
+
   return true;
 }
 
@@ -933,21 +953,30 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  // Reload XMMs from stack frame.
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (X86::GR64RegClass.contains(Reg) ||
+        X86::GR32RegClass.contains(Reg))
+      continue;
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+                             RC, TRI);
+  }
+
+  // POP GPRs.
   unsigned FPReg = TRI->getFrameRegister(MF);
-  bool isWin64 = STI.isTargetWin64();
   unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
+    if (!X86::GR64RegClass.contains(Reg) &&
+        !X86::GR32RegClass.contains(Reg))
+      continue;
     if (Reg == FPReg)
       // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
       continue;
-    if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
-      BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
-    } else {
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
-                               RC, TRI);
-    }
+    BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
   }
   return true;
 }
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9b0ec6e..4534e85 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1580,6 +1580,81 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       return RetVal;
     break;
   }
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR: {
+    // For operations of the form (x << C1) op C2, check if we can use a smaller
+    // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
+      break;
+
+    // i8 is unshrinkable, i16 should be promoted to i32.
+    if (NVT != MVT::i32 && NVT != MVT::i64)
+      break;
+
+    ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
+    ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+    if (!Cst || !ShlCst)
+      break;
+
+    int64_t Val = Cst->getSExtValue();
+    uint64_t ShlVal = ShlCst->getZExtValue();
+
+    // Make sure that we don't change the operation by removing bits.
+    // This only matters for OR and XOR, AND is unaffected.
+    if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
+      break;
+
+    unsigned ShlOp, Op = 0;
+    EVT CstVT = NVT;
+
+    // Check the minimum bitwidth for the new constant.
+    // TODO: AND32ri is the same as AND64ri32 with zext imm.
+    // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
+    // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
+    if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
+      CstVT = MVT::i8;
+    else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
+      CstVT = MVT::i32;
+
+    // Bail if there is no smaller encoding.
+    if (NVT == CstVT)
+      break;
+
+    switch (NVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unsupported VT!");
+    case MVT::i32:
+      assert(CstVT == MVT::i8);
+      ShlOp = X86::SHL32ri;
+
+      switch (Opcode) {
+      case ISD::AND: Op = X86::AND32ri8; break;
+      case ISD::OR:  Op =  X86::OR32ri8; break;
+      case ISD::XOR: Op = X86::XOR32ri8; break;
+      }
+      break;
+    case MVT::i64:
+      assert(CstVT == MVT::i8 || CstVT == MVT::i32);
+      ShlOp = X86::SHL64ri;
+
+      switch (Opcode) {
+      case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
+      case ISD::OR:  Op = CstVT==MVT::i8?  X86::OR64ri8 :  X86::OR64ri32; break;
+      case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
+      }
+      break;
+    }
+
+    // Emit the smaller op and the shift.
+    SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
+    SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
+    return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
+                                getI8Imm(ShlVal));
+    break;
+  }
   case X86ISD::UMUL: {
     SDValue N0 = Node->getOperand(0);
     SDValue N1 = Node->getOperand(1);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 2f49dbc..703c01d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -45,6 +45,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -221,7 +222,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   // X86 is weird, it always uses i8 for shift amounts and setcc results.
   setBooleanContents(ZeroOrOneBooleanContent);
-  setSchedulingPreference(Sched::RegPressure);
+    
+  // For 64-bit since we have so many registers use the ILP scheduler, for
+  // 32-bit code use the register pressure specific scheduling.
+  if (Subtarget->is64Bit())
+    setSchedulingPreference(Sched::ILP);
+  else
+    setSchedulingPreference(Sched::RegPressure);
   setStackPointerRegisterToSaveRestore(X86StackPtr);
 
   if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
@@ -543,12 +550,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
-  if (Subtarget->is64Bit())
-    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
-  if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows())
-    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
-  else
-    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC,
+                     (Subtarget->is64Bit() ? MVT::i64 : MVT::i32),
+                     (Subtarget->isTargetCOFF()
+                      && !Subtarget->isTargetEnvMacho()
+                      ? Custom : Expand));
 
   if (!UseSoftFloat && X86ScalarSSEf64) {
     // f32 and f64 use SSE.
@@ -921,6 +927,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     // Can turn SHL into an integer multiply.
     setOperationAction(ISD::SHL,                MVT::v4i32, Custom);
     setOperationAction(ISD::SHL,                MVT::v16i8, Custom);
+    setOperationAction(ISD::SRL,                MVT::v4i32, Legal);
 
     // i8 and i16 vectors are custom , because the source register and source
     // source memory operand types are not the same width.  f32 vectors are
@@ -1271,27 +1278,6 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{
   return std::make_pair(RRC, Cost);
 }
 
-// FIXME: Why this routine is here? Move to RegInfo!
-unsigned
-X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
-                                       MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
-  switch (RC->getID()) {
-  default:
-    return 0;
-  case X86::GR32RegClassID:
-    return 4 - FPDiff;
-  case X86::GR64RegClassID:
-    return 8 - FPDiff;
-  case X86::VR128RegClassID:
-    return Subtarget->is64Bit() ? 10 : 4;
-  case X86::VR64RegClassID:
-    return 4;
-  }
-}
-
 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
                                                unsigned &Offset) const {
   if (!Subtarget->isTargetLinux())
@@ -1463,6 +1449,20 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
   return HasRet;
 }
 
+EVT
+X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+                                            ISD::NodeType ExtendKind) const {
+  MVT ReturnMVT;
+  // TODO: Is this also valid on 32-bit?
+  if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
+    ReturnMVT = MVT::i8;
+  else
+    ReturnMVT = MVT::i32;
+
+  EVT MinVT = getRegisterType(Context, ReturnMVT);
+  return VT.bitsLT(MinVT) ? MinVT : VT;
+}
+
 /// LowerCallResult - Lower the result values of a call into the
 /// appropriate copies out of appropriate physical registers.
 ///
@@ -1595,6 +1595,18 @@ static bool IsTailCallConvention(CallingConv::ID CC) {
   return (CC == CallingConv::Fast || CC == CallingConv::GHC);
 }
 
+bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+  if (!CI->isTailCall())
+    return false;
+
+  CallSite CS(CI);
+  CallingConv::ID CalleeCC = CS.getCallingConv();
+  if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+    return false;
+
+  return true;
+}
+
 /// FuncIsMadeTailCallSafe - Return true if the function is being made into
 /// a tailcall target by changing its ABI.
 static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
@@ -1627,8 +1639,9 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
   // In case of tail call optimization mark all arguments mutable. Since they
   // could be overwritten by lowering of arguments in case of a tail call.
   if (Flags.isByVal()) {
-    int FI = MFI->CreateFixedObject(Flags.getByValSize(),
-                                    VA.getLocMemOffset(), isImmutable);
+    unsigned Bytes = Flags.getByValSize();
+    if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
+    int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
     return DAG.getFrameIndex(FI, getPointerTy());
   } else {
     int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
@@ -1765,8 +1778,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   // If the function takes variable number of arguments, make a frame index for
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
-    if (!IsWin64 && (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
-                    CallConv != CallingConv::X86_ThisCall))) {
+    if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+                    CallConv != CallingConv::X86_ThisCall)) {
       FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
     }
     if (Is64Bit) {
@@ -1818,7 +1831,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
         FuncInfo->setRegSaveFrameIndex(
           MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
-        FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+        // Fixup to set vararg frame on shadow area (4 x i64).
+        if (NumIntRegs < 4)
+          FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
       } else {
         // For X86-64, if there are vararg parameters that are passed via
         // registers, then we must store them to their spots on the stack so they
@@ -1937,7 +1952,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
   return SDValue(OutRetAddr.getNode(), 1);
 }
 
-/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call
+/// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
 /// optimization is performed and it is required (FPDiff!=0).
 static SDValue
 EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
@@ -2028,7 +2043,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
 
   SDValue RetAddrFrIdx;
-  // Load return adress for tail calls.
+  // Load return address for tail calls.
   if (isTailCall && FPDiff)
     Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
                                     Is64Bit, FPDiff, dl);
@@ -2185,7 +2200,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     SmallVector<SDValue, 8> MemOpChains2;
     SDValue FIN;
     int FI = 0;
-    // Do not flag preceeding copytoreg stuff together with the following stuff.
+    // Do not flag preceding copytoreg stuff together with the following stuff.
     InFlag = SDValue();
     if (GuaranteedTailCallOpt) {
       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -2266,7 +2281,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
         OpFlags = X86II::MO_PLT;
       } else if (Subtarget->isPICStyleStubAny() &&
                  (GV->isDeclaration() || GV->isWeakForLinker()) &&
-                 Subtarget->getDarwinVers() < 9) {
+                 (!Subtarget->getTargetTriple().isMacOSX() ||
+                  Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
         // PC-relative references to external symbols should go through $stub,
         // unless we're building with the leopard linker or later, which
         // automatically synthesizes these stubs.
@@ -2285,7 +2301,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
         getTargetMachine().getRelocationModel() == Reloc::PIC_) {
       OpFlags = X86II::MO_PLT;
     } else if (Subtarget->isPICStyleStubAny() &&
-               Subtarget->getDarwinVers() < 9) {
+               (!Subtarget->getTargetTriple().isMacOSX() ||
+                Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
       // PC-relative references to external symbols should go through $stub,
       // unless we're building with the leopard linker or later, which
       // automatically synthesizes these stubs.
@@ -3173,7 +3190,8 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
 bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
   unsigned NumElems = N->getValueType(0).getVectorNumElements();
 
-  if (NumElems != 2 && NumElems != 4)
+  if ((NumElems != 2 && NumElems != 4)
+      || N->getValueType(0).getSizeInBits() > 128)
     return false;
 
   for (unsigned i = 0; i < NumElems/2; ++i)
@@ -3195,19 +3213,36 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
   if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
     return false;
 
-  for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
-    int BitI  = Mask[i];
-    int BitI1 = Mask[i+1];
-    if (!isUndefOrEqual(BitI, j))
-      return false;
-    if (V2IsSplat) {
-      if (!isUndefOrEqual(BitI1, NumElts))
-        return false;
-    } else {
-      if (!isUndefOrEqual(BitI1, j + NumElts))
+  // Handle vector lengths > 128 bits.  Define a "section" as a set of
+  // 128 bits.  AVX defines UNPCK* to operate independently on 128-bit
+  // sections.
+  unsigned NumSections = VT.getSizeInBits() / 128;
+  if (NumSections == 0 ) NumSections = 1;  // Handle MMX
+  unsigned NumSectionElts = NumElts / NumSections;
+
+  unsigned Start = 0;
+  unsigned End = NumSectionElts;
+  for (unsigned s = 0; s < NumSections; ++s) {
+    for (unsigned i = Start, j = s * NumSectionElts;
+         i != End;
+         i += 2, ++j) {
+      int BitI  = Mask[i];
+      int BitI1 = Mask[i+1];
+      if (!isUndefOrEqual(BitI, j))
         return false;
+      if (V2IsSplat) {
+        if (!isUndefOrEqual(BitI1, NumElts))
+          return false;
+      } else {
+        if (!isUndefOrEqual(BitI1, j + NumElts))
+          return false;
+      }
     }
+    // Process the next 128 bits.
+    Start += NumSectionElts;
+    End += NumSectionElts;
   }
+
   return true;
 }
 
@@ -3255,14 +3290,27 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
   if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
     return false;
 
-  for (int i = 0, j = 0; i != NumElems; i += 2, ++j) {
-    int BitI  = Mask[i];
-    int BitI1 = Mask[i+1];
-    if (!isUndefOrEqual(BitI, j))
-      return false;
-    if (!isUndefOrEqual(BitI1, j))
-      return false;
+  // Handle vector lengths > 128 bits.  Define a "section" as a set of
+  // 128 bits.  AVX defines UNPCK* to operate independently on 128-bit
+  // sections.
+  unsigned NumSections = VT.getSizeInBits() / 128;
+  if (NumSections == 0 ) NumSections = 1;  // Handle MMX
+  unsigned NumSectionElts = NumElems / NumSections;
+
+  for (unsigned s = 0; s < NumSections; ++s) {
+    for (unsigned i = s * NumSectionElts, j = s * NumSectionElts;
+         i != NumSectionElts * (s + 1);
+         i += 2, ++j) {
+      int BitI  = Mask[i];
+      int BitI1 = Mask[i+1];
+
+      if (!isUndefOrEqual(BitI, j))
+        return false;
+      if (!isUndefOrEqual(BitI1, j))
+        return false;
+    }
   }
+
   return true;
 }
 
@@ -3846,8 +3894,8 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
 
 /// getShuffleScalarElt - Returns the scalar element that will make up the ith
 /// element of the result of the vector shuffle.
-SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
-                            unsigned Depth) {
+static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+                                   unsigned Depth) {
   if (Depth == 6)
     return SDValue();  // Limit search depth.
 
@@ -3895,11 +3943,15 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
     case X86ISD::PUNPCKLWD:
     case X86ISD::PUNPCKLDQ:
     case X86ISD::PUNPCKLQDQ:
-      DecodePUNPCKLMask(NumElems, ShuffleMask);
+      DecodePUNPCKLMask(VT, ShuffleMask);
       break;
     case X86ISD::UNPCKLPS:
     case X86ISD::UNPCKLPD:
-      DecodeUNPCKLPMask(NumElems, ShuffleMask);
+    case X86ISD::VUNPCKLPS:
+    case X86ISD::VUNPCKLPD:
+    case X86ISD::VUNPCKLPSY:
+    case X86ISD::VUNPCKLPDY:
+      DecodeUNPCKLPMask(VT, ShuffleMask);
       break;
     case X86ISD::MOVHLPS:
       DecodeMOVHLPSMask(NumElems, ShuffleMask);
@@ -3968,7 +4020,7 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
 
 /// getNumOfConsecutiveZeros - Return the number of elements of a vector
 /// shuffle operation which come from a consecutively from a zero. The
-/// search can start in two diferent directions, from left or right.
+/// search can start in two different directions, from left or right.
 static
 unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
                                   bool ZerosFromLeft, SelectionDAG &DAG) {
@@ -5263,6 +5315,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
 
   // Break it into (shuffle shuffle_hi, shuffle_lo).
   Locs.clear();
+  Locs.resize(4);
   SmallVector<int,8> LoMask(4U, -1);
   SmallVector<int,8> HiMask(4U, -1);
 
@@ -5508,12 +5561,16 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
                               X86::getShuffleSHUFImmediate(SVOp), DAG);
 }
 
-static inline unsigned getUNPCKLOpcode(EVT VT) {
+static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) {
   switch(VT.getSimpleVT().SimpleTy) {
   case MVT::v4i32: return X86ISD::PUNPCKLDQ;
   case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
-  case MVT::v4f32: return X86ISD::UNPCKLPS;
-  case MVT::v2f64: return X86ISD::UNPCKLPD;
+  case MVT::v4f32:
+    return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS;
+  case MVT::v2f64:
+    return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+  case MVT::v8f32: return X86ISD::VUNPCKLPSY;
+  case MVT::v4f64: return X86ISD::VUNPCKLPDY;
   case MVT::v16i8: return X86ISD::PUNPCKLBW;
   case MVT::v8i16: return X86ISD::PUNPCKLWD;
   default:
@@ -5641,7 +5698,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   // unpckh_undef). Only use pshufd if speed is more important than size.
   if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
     if (VT != MVT::v2i64 && VT != MVT::v2f64)
-      return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+      return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG);
   if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
     if (VT != MVT::v2i64 && VT != MVT::v2f64)
       return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
@@ -5762,7 +5819,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   }
 
   if (X86::isUNPCKLMask(SVOp))
-    return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+    return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+                                dl, VT, V1, V2, DAG);
 
   if (X86::isUNPCKHMask(SVOp))
     return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
@@ -5789,7 +5847,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
     ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
 
     if (X86::isUNPCKLMask(NewSVOp))
-      return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+      return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+                                  dl, VT, V2, V1, DAG);
 
     if (X86::isUNPCKHMask(NewSVOp))
       return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
@@ -5812,8 +5871,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
 
   if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
       SVOp->getSplatIndex() == 0 && V2IsUndef) {
-    if (VT == MVT::v2f64)
-      return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
+    if (VT == MVT::v2f64) {
+      X86ISD::NodeType Opcode =
+        getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+      return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG);
+    }
     if (VT == MVT::v2i64)
       return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
   }
@@ -5840,7 +5902,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
 
   if (X86::isUNPCKL_v_undef_Mask(SVOp))
     if (VT != MVT::v2i64 && VT != MVT::v2f64)
-      return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+      return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+                                  dl, VT, V1, V1, DAG);
   if (X86::isUNPCKH_v_undef_Mask(SVOp))
     if (VT != MVT::v2i64 && VT != MVT::v2f64)
       return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
@@ -7868,6 +7931,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                                            SelectionDAG &DAG) const {
   assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
          "This should be used only on Windows targets");
+  assert(!Subtarget->isTargetEnvMacho());
   DebugLoc dl = Op.getDebugLoc();
 
   // Get the inputs.
@@ -7878,8 +7942,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   SDValue Flag;
 
   EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+  unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
 
-  Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
+  Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
   Flag = Chain.getValue(1);
 
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -8809,8 +8874,8 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SADDO:
     // A subtract of one will be selected as a INC. Note that INC doesn't
     // set CF, so we can't do this for UADDO.
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
-      if (C->getAPIntValue() == 1) {
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+      if (C->isOne()) {
         BaseOp = X86ISD::INC;
         Cond = X86::COND_O;
         break;
@@ -8825,8 +8890,8 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SSUBO:
     // A subtract of one will be selected as a DEC. Note that DEC doesn't
     // set CF, so we can't do this for USUBO.
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
-      if (C->getAPIntValue() == 1) {
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+      if (C->isOne()) {
         BaseOp = X86ISD::DEC;
         Cond = X86::COND_O;
         break;
@@ -10351,21 +10416,48 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
+  assert(!Subtarget->isTargetEnvMacho());
+
   // The lowering is pretty easy: we're just emitting the call to _alloca.  The
   // non-trivial part is impdef of ESP.
-  // FIXME: The code should be tweaked as soon as we'll try to do codegen for
-  // mingw-w64.
 
-  const char *StackProbeSymbol =
+  if (Subtarget->isTargetWin64()) {
+    if (Subtarget->isTargetCygMing()) {
+      // ___chkstk(Mingw64):
+      // Clobbers R10, R11, RAX and EFLAGS.
+      // Updates RSP.
+      BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
+        .addExternalSymbol("___chkstk")
+        .addReg(X86::RAX, RegState::Implicit)
+        .addReg(X86::RSP, RegState::Implicit)
+        .addReg(X86::RAX, RegState::Define | RegState::Implicit)
+        .addReg(X86::RSP, RegState::Define | RegState::Implicit)
+        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+    } else {
+      // __chkstk(MSVCRT): does not update stack pointer.
+      // Clobbers R10, R11 and EFLAGS.
+      // FIXME: RAX(allocated size) might be reused and not killed.
+      BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
+        .addExternalSymbol("__chkstk")
+        .addReg(X86::RAX, RegState::Implicit)
+        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+      // RAX has the offset to subtracted from RSP.
+      BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
+        .addReg(X86::RSP)
+        .addReg(X86::RAX);
+    }
+  } else {
+    const char *StackProbeSymbol =
       Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
 
-  BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
-    .addExternalSymbol(StackProbeSymbol)
-    .addReg(X86::EAX, RegState::Implicit)
-    .addReg(X86::ESP, RegState::Implicit)
-    .addReg(X86::EAX, RegState::Define | RegState::Implicit)
-    .addReg(X86::ESP, RegState::Define | RegState::Implicit)
-    .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+    BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
+      .addExternalSymbol(StackProbeSymbol)
+      .addReg(X86::EAX, RegState::Implicit)
+      .addReg(X86::ESP, RegState::Implicit)
+      .addReg(X86::EAX, RegState::Define | RegState::Implicit)
+      .addReg(X86::ESP, RegState::Define | RegState::Implicit)
+      .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+  }
 
   MI->eraseFromParent();   // The pseudo instruction is gone now.
   return BB;
@@ -12126,7 +12218,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
     AsmPieces.clear();
     SplitString(AsmStr, AsmPieces, " \t");  // Split with whitespace.
 
-    // FIXME: this should verify that we are targetting a 486 or better.  If not,
+    // FIXME: this should verify that we are targeting a 486 or better.  If not,
     // we will turn this bswap into something that will be lowered to logical ops
     // instead of emitting the bswap asm.  For now, we don't support 486 or lower
     // so don't worry about this.
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 6ec4a7d..6301057 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -677,9 +677,6 @@ namespace llvm {
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
 
-    unsigned getRegPressureLimit(const TargetRegisterClass *RC,
-                                 MachineFunction &MF) const;
-
     /// getStackCookieLocation - Return true if the target stores stack
     /// protector cookies at a fixed offset in some non-standard address
     /// space, and populates the address space and offset as
@@ -846,6 +843,12 @@ namespace llvm {
 
     virtual bool isUsedByReturnOnly(SDNode *N) const;
 
+    virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
+    virtual EVT
+    getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+                             ISD::NodeType ExtendKind) const;
+
     virtual bool
       CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
                      const SmallVectorImpl<ISD::OutputArg> &Outs,
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index 45d1c6b..dd4f6a5 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -12,66 +12,91 @@
 //
 //===----------------------------------------------------------------------===//
 
-// FIXME: We don't support any intrinsics for these instructions yet.
+class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
+      : I<o, F, outs, ins, asm, pat>, TB, Requires<[Has3DNow]> {
+}
 
-class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, 
-             list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, TB, Requires<[Has3DNow]> {
+class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+      : I3DNow<o, F, (outs VR64:$dst), ins,
+          !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>,
+        Has3DNow0F0FOpcode {
+  // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+  let isAsmParserOnly = 1;
+  let Constraints = "$src1 = $dst";
 }
 
-class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic>
-      : I<o, F, (outs VR64:$dst), ins,
-          !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), []>,
-          TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode {
+class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+      : I3DNow<o, F, (outs VR64:$dst), ins,
+          !strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>,
+        Has3DNow0F0FOpcode {
   // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
   let isAsmParserOnly = 1;
 }
 
+multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
+  def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn, []>;
+  def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, []>;
+}
+
+multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+  def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>;
+  def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
+        (bitconvert (load_mmx addr:$src2))))]>;
+}
+
+multiclass I3DNow_conv_rm<bits<8> opc, string Mn> {
+  def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src1), Mn, []>;
+  def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src1), Mn, []>;
+}
 
-let Constraints = "$src1 = $dst" in {
-  // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
-  // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
-  multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
-    def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn>;
-    def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn>;
-  }
+multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+  def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>;
+  def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn))
+        (bitconvert (load_mmx addr:$src))))]>;
 }
 
-defm PAVGUSB  : I3DNow_binop_rm<0xBF, "pavgusb">;
-defm PF2ID    : I3DNow_binop_rm<0x1D, "pf2id">;
-defm PFACC    : I3DNow_binop_rm<0xAE, "pfacc">;
-defm PFADD    : I3DNow_binop_rm<0x9E, "pfadd">;
-defm PFCMPEQ  : I3DNow_binop_rm<0xB0, "pfcmpeq">;
-defm PFCMPGE  : I3DNow_binop_rm<0x90, "pfcmpge">;
-defm PFCMPGT  : I3DNow_binop_rm<0xA0, "pfcmpgt">;
-defm PFMAX    : I3DNow_binop_rm<0xA4, "pfmax">;
-defm PFMIN    : I3DNow_binop_rm<0x94, "pfmin">;
-defm PFMUL    : I3DNow_binop_rm<0xB4, "pfmul">;
-defm PFRCP    : I3DNow_binop_rm<0x96, "pfrcp">;
-defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">;
-defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">;
-defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">;
-defm PFRSQRT  : I3DNow_binop_rm<0x97, "pfrsqrt">;
-defm PFSUB    : I3DNow_binop_rm<0x9A, "pfsub">;
-defm PFSUBR   : I3DNow_binop_rm<0xAA, "pfsubr">;
-defm PI2FD    : I3DNow_binop_rm<0x0D, "pi2fd">;
-defm PMULHRW  : I3DNow_binop_rm<0xB7, "pmulhrw">;
+defm PAVGUSB  : I3DNow_binop_rm_int<0xBF, "pavgusb">;
+defm PF2ID    : I3DNow_conv_rm_int<0x1D, "pf2id">;
+defm PFACC    : I3DNow_binop_rm_int<0xAE, "pfacc">;
+defm PFADD    : I3DNow_binop_rm_int<0x9E, "pfadd">;
+defm PFCMPEQ  : I3DNow_binop_rm_int<0xB0, "pfcmpeq">;
+defm PFCMPGE  : I3DNow_binop_rm_int<0x90, "pfcmpge">;
+defm PFCMPGT  : I3DNow_binop_rm_int<0xA0, "pfcmpgt">;
+defm PFMAX    : I3DNow_binop_rm_int<0xA4, "pfmax">;
+defm PFMIN    : I3DNow_binop_rm_int<0x94, "pfmin">;
+defm PFMUL    : I3DNow_binop_rm_int<0xB4, "pfmul">;
+defm PFRCP    : I3DNow_conv_rm_int<0x96, "pfrcp">;
+defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">;
+defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">;
+defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">;
+defm PFRSQRT  : I3DNow_conv_rm_int<0x97, "pfrsqrt">;
+defm PFSUB    : I3DNow_binop_rm_int<0x9A, "pfsub">;
+defm PFSUBR   : I3DNow_binop_rm_int<0xAA, "pfsubr">;
+defm PI2FD    : I3DNow_conv_rm_int<0x0D, "pi2fd">;
+defm PMULHRW  : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
 
 
 def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
 
 def PREFETCH  : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
                        "prefetch $addr", []>;
-                       
+
 // FIXME: Diassembler gets a bogus decode conflict.
-let isAsmParserOnly = 1 in {
+let isAsmParserOnly = 1 in
 def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
                        "prefetchw $addr", []>;
-}
 
 // "3DNowA" instructions
-defm PF2IW    : I3DNow_binop_rm<0x1C, "pf2iw">;
-defm PI2FW    : I3DNow_binop_rm<0x0C, "pi2fw">;
-defm PFNACC   : I3DNow_binop_rm<0x8A, "pfnacc">;
-defm PFPNACC  : I3DNow_binop_rm<0x8E, "pfpnacc">;
-defm PSWAPD   : I3DNow_binop_rm<0xBB, "pswapd">;
+defm PF2IW    : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
+defm PI2FW    : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">;
+defm PFNACC   : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">;
+defm PFPNACC  : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">;
+defm PSWAPD   : I3DNow_conv_rm_int<0xBB, "pswapd", "a">;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index f0ea068..9f7a4b0 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -163,7 +163,7 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
 
 } // Defs = [EFLAGS]
 
-// Suprisingly enough, these are not two address instructions!
+// Surprisingly enough, these are not two address instructions!
 let Defs = [EFLAGS] in {
 // Register-Integer Signed Integer Multiply
 def IMUL16rri  : Ii16<0x69, MRMSrcReg,                      // GR16 = GR16*I16
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 77f4725..c228a0ae 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -263,6 +263,16 @@ let isCall = 1, isCodeGenOnly = 1 in
                            Requires<[IsWin64]>;
   }
 
+let isCall = 1, isCodeGenOnly = 1 in
+  // __chkstk(MSVC):     clobber R10, R11 and EFLAGS.
+  // ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP.
+  let Defs = [RAX, R10, R11, RSP, EFLAGS],
+      Uses = [RSP] in {
+    def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
+                      (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+                      "call{q}\t$dst", []>,
+                    Requires<[IsWin64]>;
+  }
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
     isCodeGenOnly = 1 in
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 0660072..7daa264 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -91,21 +91,23 @@ class REX_W  { bit hasREX_WPrefix = 1; }
 class LOCK   { bit hasLockPrefix = 1; }
 class SegFS  { bits<2> SegOvrBits = 1; }
 class SegGS  { bits<2> SegOvrBits = 2; }
-class TB     { bits<4> Prefix = 1; }
-class REP    { bits<4> Prefix = 2; }
-class D8     { bits<4> Prefix = 3; }
-class D9     { bits<4> Prefix = 4; }
-class DA     { bits<4> Prefix = 5; }
-class DB     { bits<4> Prefix = 6; }
-class DC     { bits<4> Prefix = 7; }
-class DD     { bits<4> Prefix = 8; }
-class DE     { bits<4> Prefix = 9; }
-class DF     { bits<4> Prefix = 10; }
-class XD     { bits<4> Prefix = 11; }
-class XS     { bits<4> Prefix = 12; }
-class T8     { bits<4> Prefix = 13; }
-class TA     { bits<4> Prefix = 14; }
-class TF     { bits<4> Prefix = 15; }
+class TB     { bits<5> Prefix = 1; }
+class REP    { bits<5> Prefix = 2; }
+class D8     { bits<5> Prefix = 3; }
+class D9     { bits<5> Prefix = 4; }
+class DA     { bits<5> Prefix = 5; }
+class DB     { bits<5> Prefix = 6; }
+class DC     { bits<5> Prefix = 7; }
+class DD     { bits<5> Prefix = 8; }
+class DE     { bits<5> Prefix = 9; }
+class DF     { bits<5> Prefix = 10; }
+class XD     { bits<5> Prefix = 11; }
+class XS     { bits<5> Prefix = 12; }
+class T8     { bits<5> Prefix = 13; }
+class TA     { bits<5> Prefix = 14; }
+class A6     { bits<5> Prefix = 15; }
+class A7     { bits<5> Prefix = 16; }
+class TF     { bits<5> Prefix = 17; }
 class VEX    { bit hasVEXPrefix = 1; }
 class VEX_W  { bit hasVEX_WPrefix = 1; }
 class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
@@ -136,7 +138,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   bit hasOpSizePrefix = 0;  // Does this inst have a 0x66 prefix?
   bit hasAdSizePrefix = 0;  // Does this inst have a 0x67 prefix?
 
-  bits<4> Prefix = 0;       // Which prefix byte does this inst have?
+  bits<5> Prefix = 0;       // Which prefix byte does this inst have?
   bit hasREX_WPrefix  = 0;  // Does this inst require the REX.W prefix?
   FPFormat FPForm = NotFP;  // What flavor of FP instruction is this?
   bit hasLockPrefix = 0;    // Does this inst have a 0xF0 prefix?
@@ -154,20 +156,20 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   let TSFlags{5-0}   = FormBits;
   let TSFlags{6}     = hasOpSizePrefix;
   let TSFlags{7}     = hasAdSizePrefix;
-  let TSFlags{11-8}  = Prefix;
-  let TSFlags{12}    = hasREX_WPrefix;
-  let TSFlags{15-13} = ImmT.Value;
-  let TSFlags{18-16} = FPForm.Value;
-  let TSFlags{19}    = hasLockPrefix;
-  let TSFlags{21-20} = SegOvrBits;
-  let TSFlags{23-22} = ExeDomain.Value;
-  let TSFlags{31-24} = Opcode;
-  let TSFlags{32}    = hasVEXPrefix;
-  let TSFlags{33}    = hasVEX_WPrefix;
-  let TSFlags{34}    = hasVEX_4VPrefix;
-  let TSFlags{35}    = hasVEX_i8ImmReg;
-  let TSFlags{36}    = hasVEX_L;
-  let TSFlags{37}    = has3DNow0F0FOpcode;
+  let TSFlags{12-8}  = Prefix;
+  let TSFlags{13}    = hasREX_WPrefix;
+  let TSFlags{16-14} = ImmT.Value;
+  let TSFlags{19-17} = FPForm.Value;
+  let TSFlags{20}    = hasLockPrefix;
+  let TSFlags{22-21} = SegOvrBits;
+  let TSFlags{24-23} = ExeDomain.Value;
+  let TSFlags{32-25} = Opcode;
+  let TSFlags{33}    = hasVEXPrefix;
+  let TSFlags{34}    = hasVEX_WPrefix;
+  let TSFlags{35}    = hasVEX_4VPrefix;
+  let TSFlags{36}    = hasVEX_i8ImmReg;
+  let TSFlags{37}    = hasVEX_L;
+  let TSFlags{38}    = has3DNow0F0FOpcode;
 }
 
 class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -319,7 +321,7 @@ class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
         Requires<[HasAVX]>;
 class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
            list<dag> pattern>
-      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>,
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB,
         Requires<[HasAVX]>;
 
 // SSE2 Instruction Templates:
@@ -353,7 +355,7 @@ class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
         Requires<[HasAVX]>;
 class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
            list<dag> pattern>
-      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>, TB,
         OpSize, Requires<[HasAVX]>;
 
 // SSE3 Instruction Templates:
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 5016c0f..3cbfac1 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -132,6 +132,8 @@ def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
 
 def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
 def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
+def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
 def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
 def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
 
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 76a9b12..83f0260 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -232,7 +232,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
     RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
 
-    // If this is not a reversable operation (because there is a many->one)
+    // If this is not a reversible operation (because there is a many->one)
     // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
     if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
       continue;
@@ -335,7 +335,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
     RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
 
-    // If this is not a reversable operation (because there is a many->one)
+    // If this is not a reversible operation (because there is a many->one)
     // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
     if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
       continue;
@@ -460,7 +460,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
     RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
 
-    // If this is not a reversable operation (because there is a many->one)
+    // If this is not a reversible operation (because there is a many->one)
     // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
     if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
       continue;
@@ -682,7 +682,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
     RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
 
-    // If this is not a reversable operation (because there is a many->one)
+    // If this is not a reversible operation (because there is a many->one)
     // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
     if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
       continue;
@@ -916,7 +916,6 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
     case X86::MOVSDrm:
     case X86::MOVAPSrm:
     case X86::MOVUPSrm:
-    case X86::MOVUPSrm_Int:
     case X86::MOVAPDrm:
     case X86::MOVDQArm:
     case X86::MMX_MOVD64rm:
@@ -2241,6 +2240,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   bool isTwoAddr = NumOps > 1 &&
     MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
 
+  // FIXME: AsmPrinter doesn't know how to handle
+  // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+  if (MI->getOpcode() == X86::ADD32ri &&
+      MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+    return NULL;
+
   MachineInstr *NewMI = NULL;
   // Folding a memory location into the two-address part of a two-address
   // instruction is different than folding it other places.  It requires
@@ -2535,6 +2540,12 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
     case X86::TEST32rr:
     case X86::TEST64rr:
       return true;
+    case X86::ADD32ri:
+      // FIXME: AsmPrinter doesn't know how to handle
+      // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+      if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+        return false;
+      break;
     }
   }
 
@@ -2845,11 +2856,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::FsMOVAPDrm:
   case X86::MOVAPSrm:
   case X86::MOVUPSrm:
-  case X86::MOVUPSrm_Int:
   case X86::MOVAPDrm:
   case X86::MOVDQArm:
   case X86::MOVDQUrm:
-  case X86::MOVDQUrm_Int:
     break;
   }
   switch (Opc2) {
@@ -2869,11 +2878,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::FsMOVAPDrm:
   case X86::MOVAPSrm:
   case X86::MOVUPSrm:
-  case X86::MOVUPSrm_Int:
   case X86::MOVAPDrm:
   case X86::MOVDQArm:
   case X86::MOVDQUrm:
-  case X86::MOVDQUrm_Int:
     break;
   }
 
@@ -3085,12 +3092,8 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
   NopInst.setOpcode(X86::NOOP);
 }
 
-bool X86InstrInfo::
-hasHighOperandLatency(const InstrItineraryData *ItinData,
-                      const MachineRegisterInfo *MRI,
-                      const MachineInstr *DefMI, unsigned DefIdx,
-                      const MachineInstr *UseMI, unsigned UseIdx) const {
-  switch (DefMI->getOpcode()) {
+bool X86InstrInfo::isHighLatencyDef(int opc) const {
+  switch (opc) {
   default: return false;
   case X86::DIVSDrm:
   case X86::DIVSDrm_Int:
@@ -3120,6 +3123,14 @@ hasHighOperandLatency(const InstrItineraryData *ItinData,
   }
 }
 
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+                      const MachineRegisterInfo *MRI,
+                      const MachineInstr *DefMI, unsigned DefIdx,
+                      const MachineInstr *UseMI, unsigned UseIdx) const {
+  return isHighLatencyDef(DefMI->getOpcode());
+}
+
 namespace {
   /// CGBR - Create Global Base Reg pass. This initializes the PIC
   /// global base register for x86-32.
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index fcb5a25..8da68b5 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -33,15 +33,15 @@ namespace X86 {
     AddrScaleAmt = 1,
     AddrIndexReg = 2,
     AddrDisp = 3,
-    
+
     /// AddrSegmentReg - The operand # of the segment in the memory operand.
     AddrSegmentReg = 4,
 
     /// AddrNumOperands - Total number of operands in a memory reference.
     AddrNumOperands = 5
   };
-  
-  
+
+
   // X86 specific condition code. These correspond to X86_*_COND in
   // X86InstrInfo.td. They must be kept in synch.
   enum CondCode {
@@ -72,16 +72,16 @@ namespace X86 {
 
     COND_INVALID
   };
-    
+
   // Turn condition code into conditional branch opcode.
   unsigned GetCondBranchFromCond(CondCode CC);
-  
+
   /// GetOppositeBranchCondition - Return the inverse of the specified cond,
   /// e.g. turning COND_E to COND_NE.
   CondCode GetOppositeBranchCondition(X86::CondCode CC);
 
 }
-  
+
 /// X86II - This namespace holds all of the target specific flags that
 /// instruction info tracks.
 ///
@@ -90,14 +90,14 @@ namespace X86II {
   enum TOF {
     //===------------------------------------------------------------------===//
     // X86 Specific MachineOperand flags.
-    
+
     MO_NO_FLAG,
-    
+
     /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
     /// relocation of:
     ///    SYMBOL_LABEL + [. - PICBASELABEL]
     MO_GOT_ABSOLUTE_ADDRESS,
-    
+
     /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
     /// immediate should get the value of the symbol minus the PIC base label:
     ///    SYMBOL_LABEL - PICBASELABEL
@@ -106,77 +106,77 @@ namespace X86II {
     /// MO_GOT - On a symbol operand this indicates that the immediate is the
     /// offset to the GOT entry for the symbol name from the base of the GOT.
     ///
-    /// See the X86-64 ELF ABI supplement for more details. 
+    /// See the X86-64 ELF ABI supplement for more details.
     ///    SYMBOL_LABEL @GOT
     MO_GOT,
-    
+
     /// MO_GOTOFF - On a symbol operand this indicates that the immediate is
-    /// the offset to the location of the symbol name from the base of the GOT. 
+    /// the offset to the location of the symbol name from the base of the GOT.
     ///
-    /// See the X86-64 ELF ABI supplement for more details. 
+    /// See the X86-64 ELF ABI supplement for more details.
     ///    SYMBOL_LABEL @GOTOFF
     MO_GOTOFF,
-    
+
     /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
     /// offset to the GOT entry for the symbol name from the current code
-    /// location. 
+    /// location.
     ///
-    /// See the X86-64 ELF ABI supplement for more details. 
+    /// See the X86-64 ELF ABI supplement for more details.
     ///    SYMBOL_LABEL @GOTPCREL
     MO_GOTPCREL,
-    
+
     /// MO_PLT - On a symbol operand this indicates that the immediate is
-    /// offset to the PLT entry of symbol name from the current code location. 
+    /// offset to the PLT entry of symbol name from the current code location.
     ///
-    /// See the X86-64 ELF ABI supplement for more details. 
+    /// See the X86-64 ELF ABI supplement for more details.
     ///    SYMBOL_LABEL @PLT
     MO_PLT,
-    
+
     /// MO_TLSGD - On a symbol operand this indicates that the immediate is
     /// some TLS offset.
     ///
-    /// See 'ELF Handling for Thread-Local Storage' for more details. 
+    /// See 'ELF Handling for Thread-Local Storage' for more details.
     ///    SYMBOL_LABEL @TLSGD
     MO_TLSGD,
-    
+
     /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
     /// some TLS offset.
     ///
-    /// See 'ELF Handling for Thread-Local Storage' for more details. 
+    /// See 'ELF Handling for Thread-Local Storage' for more details.
     ///    SYMBOL_LABEL @GOTTPOFF
     MO_GOTTPOFF,
-   
+
     /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
     /// some TLS offset.
     ///
-    /// See 'ELF Handling for Thread-Local Storage' for more details. 
+    /// See 'ELF Handling for Thread-Local Storage' for more details.
     ///    SYMBOL_LABEL @INDNTPOFF
     MO_INDNTPOFF,
-    
+
     /// MO_TPOFF - On a symbol operand this indicates that the immediate is
     /// some TLS offset.
     ///
-    /// See 'ELF Handling for Thread-Local Storage' for more details. 
+    /// See 'ELF Handling for Thread-Local Storage' for more details.
     ///    SYMBOL_LABEL @TPOFF
     MO_TPOFF,
-    
+
     /// MO_NTPOFF - On a symbol operand this indicates that the immediate is
     /// some TLS offset.
     ///
-    /// See 'ELF Handling for Thread-Local Storage' for more details. 
+    /// See 'ELF Handling for Thread-Local Storage' for more details.
     ///    SYMBOL_LABEL @NTPOFF
     MO_NTPOFF,
-    
+
     /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
     /// reference is actually to the "__imp_FOO" symbol.  This is used for
     /// dllimport linkage on windows.
     MO_DLLIMPORT,
-    
+
     /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
     /// reference is actually to the "FOO$stub" symbol.  This is used for calls
     /// and jumps to external functions on Tiger and earlier.
     MO_DARWIN_STUB,
-    
+
     /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
     /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
     /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
@@ -186,19 +186,19 @@ namespace X86II {
     /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
     /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
     MO_DARWIN_NONLAZY_PIC_BASE,
-    
+
     /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this
     /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
     /// which is a PIC-base-relative reference to a hidden dyld lazy pointer
     /// stub.
     MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE,
-    
+
     /// MO_TLVP - On a symbol operand this indicates that the immediate is
     /// some TLS offset.
     ///
     /// This is the TLS offset for the Darwin TLS mechanism.
     MO_TLVP,
-    
+
     /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
     /// is some TLS offset from the picbase.
     ///
@@ -239,7 +239,7 @@ inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) {
     return false;
   }
 }
- 
+
 /// X86II - This namespace holds all of the target specific flags that
 /// instruction info tracks.
 ///
@@ -299,7 +299,7 @@ namespace X86II {
     // MRMInitReg - This form is used for instructions whose source and
     // destinations are the same register.
     MRMInitReg = 32,
-    
+
     //// MRM_C1 - A mod/rm byte of exactly 0xC1.
     MRM_C1 = 33,
     MRM_C2 = 34,
@@ -318,7 +318,7 @@ namespace X86II {
     /// immediates, the first of which is a 16-bit immediate (specified by
     /// the imm encoding) and the second is a 8-bit fixed value.
     RawFrmImm8 = 43,
-    
+
     /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
     /// immediates, the first of which is a 16 or 32-bit immediate (specified by
     /// the imm encoding) and the second is a 16-bit fixed value.  In the AMD
@@ -347,7 +347,7 @@ namespace X86II {
     // set, there is no prefix byte for obtaining a multibyte opcode.
     //
     Op0Shift    = 8,
-    Op0Mask     = 0xF << Op0Shift,
+    Op0Mask     = 0x1F << Op0Shift,
 
     // TB - TwoByte - Set if this instruction has a two byte opcode, which
     // starts with a 0x0F byte before the real opcode.
@@ -368,11 +368,12 @@ namespace X86II {
     // floating point operations performed in the SSE registers.
     XD = 11 << Op0Shift,  XS = 12 << Op0Shift,
 
-    // T8, TA - Prefix after the 0x0F prefix.
+    // T8, TA, A6, A7 - Prefix after the 0x0F prefix.
     T8 = 13 << Op0Shift,  TA = 14 << Op0Shift,
-    
+    A6 = 15 << Op0Shift,  A7 = 16 << Op0Shift,
+
     // TF - Prefix before and after 0x0F
-    TF = 15 << Op0Shift,
+    TF = 17 << Op0Shift,
 
     //===------------------------------------------------------------------===//
     // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
@@ -380,13 +381,13 @@ namespace X86II {
     // etc. We only cares about REX.W and REX.R bits and only the former is
     // statically determined.
     //
-    REXShift    = 12,
+    REXShift    = Op0Shift + 5,
     REX_W       = 1 << REXShift,
 
     //===------------------------------------------------------------------===//
     // This three-bit field describes the size of an immediate operand.  Zero is
     // unused so that we can tell if we forgot to set a value.
-    ImmShift = 13,
+    ImmShift = REXShift + 1,
     ImmMask    = 7 << ImmShift,
     Imm8       = 1 << ImmShift,
     Imm8PCRel  = 2 << ImmShift,
@@ -400,7 +401,7 @@ namespace X86II {
     // FP Instruction Classification...  Zero is non-fp instruction.
 
     // FPTypeMask - Mask for all of the FP types...
-    FPTypeShift = 16,
+    FPTypeShift = ImmShift + 3,
     FPTypeMask  = 7 << FPTypeShift,
 
     // NotFP - The default, set for instructions that do not use FP registers.
@@ -433,25 +434,26 @@ namespace X86II {
     SpecialFP  = 7 << FPTypeShift,
 
     // Lock prefix
-    LOCKShift = 19,
+    LOCKShift = FPTypeShift + 3,
     LOCK = 1 << LOCKShift,
 
     // Segment override prefixes. Currently we just need ability to address
     // stuff in gs and fs segments.
-    SegOvrShift = 20,
+    SegOvrShift = LOCKShift + 1,
     SegOvrMask  = 3 << SegOvrShift,
     FS          = 1 << SegOvrShift,
     GS          = 2 << SegOvrShift,
 
-    // Execution domain for SSE instructions in bits 22, 23.
-    // 0 in bits 22-23 means normal, non-SSE instruction.
-    SSEDomainShift = 22,
+    // Execution domain for SSE instructions in bits 23, 24.
+    // 0 in bits 23-24 means normal, non-SSE instruction.
+    SSEDomainShift = SegOvrShift + 2,
 
-    OpcodeShift   = 24,
-    OpcodeMask    = 0xFF << OpcodeShift,
+    OpcodeShift   = SSEDomainShift + 2,
+    OpcodeMask    = 0xFFULL << OpcodeShift,
 
     //===------------------------------------------------------------------===//
     /// VEX - The opcode prefix used by AVX instructions
+    VEXShift = OpcodeShift + 8,
     VEX         = 1U << 0,
 
     /// VEX_W - Has a opcode specific functionality, but is used in the same
@@ -473,7 +475,7 @@ namespace X86II {
     /// if a VR256 register is used, but some AVX instructions also have this
     /// field marked when using a f256 memory references.
     VEX_L       = 1U << 4,
-    
+
     /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
     /// wacky 0x0F 0x0F prefix for 3DNow! instructions.  The manual documents
     /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
@@ -482,18 +484,18 @@ namespace X86II {
     /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
     Has3DNow0F0FOpcode = 1U << 5
   };
-  
+
   // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
   // specified machine instruction.
   //
   static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
     return TSFlags >> X86II::OpcodeShift;
   }
-  
+
   static inline bool hasImm(uint64_t TSFlags) {
     return (TSFlags & X86II::ImmMask) != 0;
   }
-  
+
   /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
   /// of the specified instruction.
   static inline unsigned getSizeOfImm(uint64_t TSFlags) {
@@ -508,7 +510,7 @@ namespace X86II {
     case X86II::Imm64:      return 8;
     }
   }
-  
+
   /// isImmPCRel - Return true if the immediate of the specified instruction's
   /// TSFlags indicates that it is pc relative.
   static inline unsigned isImmPCRel(uint64_t TSFlags) {
@@ -525,7 +527,7 @@ namespace X86II {
       return false;
     }
   }
-  
+
   /// getMemoryOperandNo - The function returns the MCInst operand # for the
   /// first field of the memory operand.  If the instruction doesn't have a
   /// memory operand, this returns -1.
@@ -549,11 +551,11 @@ namespace X86II {
     case X86II::MRMDestMem:
       return 0;
     case X86II::MRMSrcMem: {
-      bool HasVEX_4V = (TSFlags >> 32) & X86II::VEX_4V;
+      bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
       unsigned FirstMemOp = 1;
       if (HasVEX_4V)
         ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
-      
+
       // FIXME: Maybe lea should have its own form?  This is a horrible hack.
       //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
       //    Opcode == X86::LEA16r || Opcode == X86::LEA32r)
@@ -613,7 +615,7 @@ inline static bool isMem(const MachineInstr *MI, unsigned Op) {
 class X86InstrInfo : public TargetInstrInfoImpl {
   X86TargetMachine &TM;
   const X86RegisterInfo RI;
-  
+
   /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
   /// RegOp2MemOpTable2 - Load / store folding opcode maps.
   ///
@@ -621,7 +623,7 @@ class X86InstrInfo : public TargetInstrInfoImpl {
   DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
   DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
   DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
-  
+
   /// MemOp2RegOpTable - Load / store unfolding opcode map.
   ///
   DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
@@ -795,7 +797,7 @@ public:
   virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
                                       bool UnfoldLoad, bool UnfoldStore,
                                       unsigned *LoadRegIndex = 0) const;
-  
+
   /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler
   /// to determine if two loads are loading from the same base address. It
   /// should only return true if the base pointers are the same and the
@@ -805,7 +807,7 @@ public:
                                        int64_t &Offset1, int64_t &Offset2) const;
 
   /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
-  /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+  /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
   /// be scheduled togther. On some targets if two loads are loading from
   /// addresses in the same cache line, it's better if they are scheduled
   /// together. This function takes two integers that represent the load offsets
@@ -829,7 +831,7 @@ public:
     return (reg == X86::SPL || reg == X86::BPL ||
           reg == X86::SIL || reg == X86::DIL);
   }
-  
+
   static bool isX86_64ExtendedReg(const MachineOperand &MO) {
     if (!MO.isReg()) return false;
     return isX86_64ExtendedReg(MO.getReg());
@@ -858,11 +860,13 @@ public:
                                       const SmallVectorImpl<MachineOperand> &MOs,
                                       unsigned Size, unsigned Alignment) const;
 
+  bool isHighLatencyDef(int opc) const;
+
   bool hasHighOperandLatency(const InstrItineraryData *ItinData,
                              const MachineRegisterInfo *MRI,
                              const MachineInstr *DefMI, unsigned DefIdx,
                              const MachineInstr *UseMI, unsigned UseIdx) const;
-  
+
 private:
   MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
                                               MachineFunction::iterator &MFI,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index f832a7c..03a0b0c 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -459,7 +459,7 @@ def CallImmAddr  : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
 include "X86InstrFormats.td"
 
 //===----------------------------------------------------------------------===//
-// Pattern fragments...
+// Pattern fragments.
 //
 
 // X86 specific condition code. These correspond to CondCode in
@@ -481,21 +481,21 @@ def X86_COND_O   : PatLeaf<(i8 13)>;
 def X86_COND_P   : PatLeaf<(i8 14)>; // alt. COND_PE
 def X86_COND_S   : PatLeaf<(i8 15)>;
 
-def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>;
+let FastIselShouldIgnore = 1 in { // FastIsel should ignore all simm8 instrs.
+  def i16immSExt8  : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
+  def i32immSExt8  : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
+  def i64immSExt8  : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
+}
 
-def i16immSExt8  : PatLeaf<(i16 immSext8)>;
-def i32immSExt8  : PatLeaf<(i32 immSext8)>;
-def i64immSExt8  : PatLeaf<(i64 immSext8)>;
-def i64immSExt32  : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
-def i64immZExt32  : PatLeaf<(i64 imm), [{
-  // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
-  // unsignedsign extended field.
-  return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
-}]>;
+def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>;
+
+
+// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+// unsigned field.
+def i64immZExt32 : ImmLeaf<i64, [{ return (uint64_t)Imm == (uint32_t)Imm; }]>;
 
-def i64immZExt32SExt8 : PatLeaf<(i64 imm), [{
-    uint64_t v = N->getZExtValue();
-    return v == (uint32_t)v && (int32_t)v == (int8_t)v;
+def i64immZExt32SExt8 : ImmLeaf<i64, [{
+  return (uint64_t)Imm == (uint32_t)Imm && (int32_t)Imm == (int8_t)Imm;
 }]>;
 
 // Helper fragments for loads.
@@ -1437,7 +1437,7 @@ def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>;
 
 // Various unary fpstack operations default to operating on on ST1.
 // For example, "fxch" -> "fxch %st(1)"
-def : InstAlias<"faddp",        (ADD_FPrST0  ST1)>;
+def : InstAlias<"faddp",        (ADD_FPrST0  ST1), 0>;
 def : InstAlias<"fsubp",        (SUBR_FPrST0 ST1)>;
 def : InstAlias<"fsubrp",       (SUB_FPrST0  ST1)>;
 def : InstAlias<"fmulp",        (MUL_FPrST0  ST1)>;
@@ -1455,13 +1455,15 @@ def : InstAlias<"fucompi",      (UCOM_FIPr   ST1)>;
 // For example, "fadd %st(4), %st(0)" -> "fadd %st(4)".  We also disambiguate
 // instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
 // gas.
-multiclass FpUnaryAlias<string Mnemonic, Instruction Inst> {
- def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"),    (Inst RST:$op)>;
- def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"), (Inst ST0)>;
+multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
+ def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"),
+                 (Inst RST:$op), EmitAlias>;
+ def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"),
+                 (Inst ST0), EmitAlias>;
 }
 
 defm : FpUnaryAlias<"fadd",   ADD_FST0r>;
-defm : FpUnaryAlias<"faddp",  ADD_FPrST0>;
+defm : FpUnaryAlias<"faddp",  ADD_FPrST0, 0>;
 defm : FpUnaryAlias<"fsub",   SUB_FST0r>;
 defm : FpUnaryAlias<"fsubp",  SUBR_FPrST0>;
 defm : FpUnaryAlias<"fsubr",  SUBR_FST0r>;
@@ -1472,8 +1474,8 @@ defm : FpUnaryAlias<"fdiv",   DIV_FST0r>;
 defm : FpUnaryAlias<"fdivp",  DIVR_FPrST0>;
 defm : FpUnaryAlias<"fdivr",  DIVR_FST0r>;
 defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>;
-defm : FpUnaryAlias<"fcomi",   COM_FIr>;
-defm : FpUnaryAlias<"fucomi",  UCOM_FIr>;
+defm : FpUnaryAlias<"fcomi",   COM_FIr, 0>;
+defm : FpUnaryAlias<"fucomi",  UCOM_FIr, 0>;
 defm : FpUnaryAlias<"fcompi",   COM_FIPr>;
 defm : FpUnaryAlias<"fucompi",  UCOM_FIPr>;
 
@@ -1481,7 +1483,7 @@ defm : FpUnaryAlias<"fucompi",  UCOM_FIPr>;
 // Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
 // commute.  We also allow fdiv[r]p/fsubrp even though they don't commute,
 // solely because gas supports it.
-def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op)>;
+def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op), 0>;
 def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>;
 def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>;
 def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>;
@@ -1534,29 +1536,31 @@ def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>;
 def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>;
 
 // Match 'movq GR64, MMX' as an alias for movd.
-def : InstAlias<"movq $src, $dst", (MMX_MOVD64to64rr VR64:$dst, GR64:$src)>;
-def : InstAlias<"movq $src, $dst", (MMX_MOVD64from64rr GR64:$dst, VR64:$src)>;
+def : InstAlias<"movq $src, $dst",
+                (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq $src, $dst",
+                (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
 
 // movsd with no operands (as opposed to the SSE scalar move of a double) is an
 // alias for movsl. (as in rep; movsd)
 def : InstAlias<"movsd", (MOVSD)>;
 
 // movsx aliases
-def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
 
 // movzx aliases
-def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>;
 // Note: No GR32->GR64 movzx form.
 
 // outb %dx -> outb %al, %dx
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b912949..cde3f6b 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -135,18 +135,16 @@ class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
 // is used instead. Register-to-register movss/movsd is not modeled as an
 // INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
 // in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
-let isAsmParserOnly = 1 in {
-  def VMOVSSrr : sse12_move_rr<FR32, v4f32,
-                  "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
-  def VMOVSDrr : sse12_move_rr<FR64, v2f64,
-                  "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
+def VMOVSSrr : sse12_move_rr<FR32, v4f32,
+                "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
+def VMOVSDrr : sse12_move_rr<FR64, v2f64,
+                "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
 
-  let canFoldAsLoad = 1, isReMaterializable = 1 in {
-    def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+  def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
 
-    let AddedComplexity = 20 in
-      def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
-  }
+  let AddedComplexity = 20 in
+    def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
 }
 
 let Constraints = "$src1 = $dst" in {
@@ -218,14 +216,12 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
                   "movsd\t{$src, $dst|$dst, $src}",
                   [(store FR64:$src, addr:$dst)]>;
 
-let isAsmParserOnly = 1 in {
 def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
                   "movss\t{$src, $dst|$dst, $src}",
                   [(store FR32:$src, addr:$dst)]>, XS, VEX;
 def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
                   "movsd\t{$src, $dst|$dst, $src}",
                   [(store FR64:$src, addr:$dst)]>, XD, VEX;
-}
 
 // Extract and store.
 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
@@ -251,7 +247,6 @@ let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
                    [(set RC:$dst, (ld_frag addr:$src))], d>;
 }
 
-let isAsmParserOnly = 1 in {
 defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
                               "movaps", SSEPackedSingle>, VEX;
 defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
@@ -269,7 +264,6 @@ defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
                               "movups", SSEPackedSingle>, VEX;
 defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
                               "movupd", SSEPackedDouble, 0>, OpSize, VEX;
-}
 defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
                               "movaps", SSEPackedSingle>, TB;
 defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
@@ -279,7 +273,6 @@ defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
 defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
                               "movupd", SSEPackedDouble, 0>, TB, OpSize;
 
-let isAsmParserOnly = 1 in {
 def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movaps\t{$src, $dst|$dst, $src}",
                    [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
@@ -304,7 +297,6 @@ def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
 def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
                    "movupd\t{$src, $dst|$dst, $src}",
                    [(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
-}
 
 def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
 def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
@@ -328,32 +320,14 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    [(store (v2f64 VR128:$src), addr:$dst)]>;
 
 // Intrinsic forms of MOVUPS/D load and store
-let isAsmParserOnly = 1 in {
-  let canFoldAsLoad = 1, isReMaterializable = 1 in
-  def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst),
-             (ins f128mem:$src),
-             "movups\t{$src, $dst|$dst, $src}",
-             [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX;
-  def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst),
-             (ins f128mem:$src),
-             "movupd\t{$src, $dst|$dst, $src}",
-             [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX;
-  def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
-             (ins f128mem:$dst, VR128:$src),
-             "movups\t{$src, $dst|$dst, $src}",
-             [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
-  def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
-             (ins f128mem:$dst, VR128:$src),
-             "movupd\t{$src, $dst|$dst, $src}",
-             [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
-}
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "movups\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
-def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "movupd\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
+def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
+           (ins f128mem:$dst, VR128:$src),
+           "movups\t{$src, $dst|$dst, $src}",
+           [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
+def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
+           (ins f128mem:$dst, VR128:$src),
+           "movupd\t{$src, $dst|$dst, $src}",
+           [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
 
 def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                        "movups\t{$src, $dst|$dst, $src}",
@@ -382,7 +356,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
               SSEPackedDouble>, TB, OpSize;
 }
 
-let isAsmParserOnly = 1, AddedComplexity = 20 in {
+let AddedComplexity = 20 in {
   defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
                      "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
   defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
@@ -395,7 +369,6 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
                                    "\t{$src2, $dst|$dst, $src2}">;
 }
 
-let isAsmParserOnly = 1 in {
 def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlps\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -404,7 +377,6 @@ def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlpd\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract (v2f64 VR128:$src),
                                  (iPTR 0))), addr:$dst)]>, VEX;
-}
 def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlps\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -416,7 +388,6 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
 
 // v2f64 extract element 1 is always custom lowered to unpack high to low
 // and extract element 0 so the non-store version isn't too horrible.
-let isAsmParserOnly = 1 in {
 def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movhps\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract
@@ -429,7 +400,6 @@ def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                                  (v2f64 (unpckh VR128:$src, (undef))),
                                  (iPTR 0))), addr:$dst)]>,
                    VEX;
-}
 def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movhps\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract
@@ -441,7 +411,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                                  (v2f64 (unpckh VR128:$src, (undef))),
                                  (iPTR 0))), addr:$dst)]>;
 
-let isAsmParserOnly = 1, AddedComplexity = 20 in {
+let AddedComplexity = 20 in {
   def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
                                        (ins VR128:$src1, VR128:$src2),
                       "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -516,7 +486,6 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
 }
 
-let isAsmParserOnly = 1 in {
 defm VCVTTSS2SI   : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
                                 "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
 defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
@@ -542,7 +511,6 @@ defm VCVTSI2SDL  : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
                                   VEX_4V;
 defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
                                   VEX_4V, VEX_W;
-}
 
 defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
                       "cvttss2si\t{$src, $dst|$dst, $src}">, XS;
@@ -591,27 +559,25 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
               [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
 }
 
-let isAsmParserOnly = 1 in {
-  defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
-                        f32mem, load, "cvtss2si">, XS, VEX;
-  defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
-                          int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
-                          XS, VEX, VEX_W;
-  defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
-                        f128mem, load, "cvtsd2si">, XD, VEX;
-  defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
-                        int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
-                        XD, VEX, VEX_W;
-
-  // FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
-  // Get rid of this hack or rename the intrinsics, there are several
-  // intructions that only match with the intrinsic form, why create duplicates
-  // to let them be recognized by the assembler?
-  defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
-                        "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
-  defm VCVTSD2SI64   : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
-                        "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
-}
+defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+                      f32mem, load, "cvtss2si">, XS, VEX;
+defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+                        int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
+                        XS, VEX, VEX_W;
+defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+                      f128mem, load, "cvtsd2si">, XD, VEX;
+defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+                      int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
+                      XD, VEX, VEX_W;
+
+// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
+// Get rid of this hack or rename the intrinsics, there are several
+// intructions that only match with the intrinsic form, why create duplicates
+// to let them be recognized by the assembler?
+defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
+                      "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTSD2SI64   : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
+                      "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
 defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
                       f32mem, load, "cvtss2si">, XS;
 defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
@@ -622,18 +588,16 @@ defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
                   f128mem, load, "cvtsd2si{q}">, XD, REX_W;
 
 
-let isAsmParserOnly = 1 in {
-  defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
-            int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
-  defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
-            int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
-            VEX_W;
-  defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
-            int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
-  defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
-            int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
-            VEX_4V, VEX_W;
-}
+defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+          int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
+defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+          int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
+          VEX_W;
+defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+          int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
+defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+          int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
+          VEX_4V, VEX_W;
 
 let Constraints = "$src1 = $dst" in {
   defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
@@ -653,7 +617,6 @@ let Constraints = "$src1 = $dst" in {
 /// SSE 1 Only
 
 // Aliases for intrinsics
-let isAsmParserOnly = 1 in {
 defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
                                     f32mem, load, "cvttss2si">, XS, VEX;
 defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
@@ -664,7 +627,6 @@ defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
 defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                     int_x86_sse2_cvttsd2si64, f128mem, load,
                                     "cvttsd2si">, XD, VEX, VEX_W;
-}
 defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
                                     f32mem, load, "cvttss2si">, XS;
 defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
@@ -676,7 +638,7 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                     int_x86_sse2_cvttsd2si64, f128mem, load,
                                     "cvttsd2si{q}">, XD, REX_W;
 
-let isAsmParserOnly = 1, Pattern = []<dag> in {
+let Pattern = []<dag> in {
 defm VCVTSS2SI   : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
                                "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
 defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
@@ -702,7 +664,6 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
 /// SSE 2 Only
 
 // Convert scalar double to scalar single
-let isAsmParserOnly = 1 in {
 def VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
                        (ins FR64:$src1, FR64:$src2),
                       "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -711,7 +672,6 @@ def VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
                        (ins FR64:$src1, f64mem:$src2),
                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
-}
 def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
         Requires<[HasAVX]>;
 
@@ -723,7 +683,6 @@ def CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
                       [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
                   Requires<[HasSSE2, OptForSize]>;
 
-let isAsmParserOnly = 1 in
 defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
                       int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
                       XS, VEX_4V;
@@ -732,7 +691,7 @@ defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
                       int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
 
 // Convert scalar single to scalar double
-let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+// SSE2 instructions with XS prefix
 def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
                     (ins FR32:$src1, FR32:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -741,7 +700,6 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
                     (ins FR32:$src1, f32mem:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
-}
 def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>,
         Requires<[HasAVX]>;
 
@@ -754,7 +712,6 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
                    [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
                  Requires<[HasSSE2, OptForSize]>;
 
-let isAsmParserOnly = 1 in {
 def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -767,7 +724,6 @@ def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
                     [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
                                        (load addr:$src2)))]>, XS, VEX_4V,
                     Requires<[HasAVX]>;
-}
 let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
 def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -788,7 +744,7 @@ def : Pat<(extloadf32 addr:$src),
       Requires<[HasSSE2, OptForSpeed]>;
 
 // Convert doubleword to packed single/double fp
-let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+// SSE2 instructions without OpSize prefix
 def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "vcvtdq2ps\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
@@ -798,7 +754,6 @@ def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                       [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
                                         (bitconvert (memopv2i64 addr:$src))))]>,
                      TB, VEX, Requires<[HasAVX]>;
-}
 def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtdq2ps\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
@@ -810,7 +765,7 @@ def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                      TB, Requires<[HasSSE2]>;
 
 // FIXME: why the non-intrinsic version is described as SSE3?
-let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+// SSE2 instructions with XS prefix
 def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "vcvtdq2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
@@ -820,7 +775,6 @@ def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                        [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
                                         (bitconvert (memopv2i64 addr:$src))))]>,
                      XS, VEX, Requires<[HasAVX]>;
-}
 def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
@@ -833,7 +787,6 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
 
 
 // Convert packed single/double fp to doubleword
-let isAsmParserOnly = 1 in {
 def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -842,13 +795,11 @@ def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                         "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                         "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-}
 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                      "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
 
-let isAsmParserOnly = 1 in {
 def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "cvtps2dq\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>,
@@ -858,7 +809,6 @@ def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
                          "cvtps2dq\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse2_cvtps2dq
                                             (memop addr:$src)))]>, VEX;
-}
 def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "cvtps2dq\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
@@ -867,7 +817,7 @@ def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                          [(set VR128:$dst, (int_x86_sse2_cvtps2dq
                                             (memop addr:$src)))]>;
 
-let isAsmParserOnly = 1 in { // SSE2 packed instructions with XD prefix
+// SSE2 packed instructions with XD prefix
 def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "vcvtpd2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
@@ -877,7 +827,6 @@ def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
                                           (memop addr:$src)))]>,
                      XD, VEX, Requires<[HasAVX]>;
-}
 def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtpd2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
@@ -890,7 +839,7 @@ def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
 
 
 // Convert with truncation packed single/double fp to doubleword
-let isAsmParserOnly = 1 in { // SSE2 packed instructions with XS prefix
+// SSE2 packed instructions with XS prefix
 def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -899,7 +848,6 @@ def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                       "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                       "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-}
 def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvttps2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
@@ -910,7 +858,6 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                             (int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
 
 
-let isAsmParserOnly = 1 in {
 def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "vcvttps2dq\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
@@ -921,9 +868,7 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                         [(set VR128:$dst, (int_x86_sse2_cvttps2dq
                                            (memop addr:$src)))]>,
                       XS, VEX, Requires<[HasAVX]>;
-}
 
-let isAsmParserOnly = 1 in {
 def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
                             (ins VR128:$src),
                           "cvttpd2dq\t{$src, $dst|$dst, $src}",
@@ -934,7 +879,6 @@ def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
                           "cvttpd2dq\t{$src, $dst|$dst, $src}",
                           [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
                                              (memop addr:$src)))]>, VEX;
-}
 def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvttpd2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
@@ -943,7 +887,6 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
                       [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
                                         (memop addr:$src)))]>;
 
-let isAsmParserOnly = 1 in {
 // The assembler can recognize rr 256-bit instructions by seeing a ymm
 // register, but the same isn't true when using memory operands instead.
 // Provide other assembly rr and rm forms to address this explicitly.
@@ -963,10 +906,9 @@ def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
                          "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                          "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
-}
 
 // Convert packed single to packed double
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
                   // SSE2 instructions without OpSize prefix
 def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
@@ -982,7 +924,6 @@ def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
 def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                        "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
 
-let isAsmParserOnly = 1 in {
 def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "vcvtps2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
@@ -992,7 +933,6 @@ def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                        [(set VR128:$dst, (int_x86_sse2_cvtps2pd
                                           (load addr:$src)))]>,
                      VEX, Requires<[HasAVX]>;
-}
 def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtps2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
@@ -1004,7 +944,6 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                      TB, Requires<[HasSSE2]>;
 
 // Convert packed double to packed single
-let isAsmParserOnly = 1 in {
 // The assembler can recognize rr 256-bit instructions by seeing a ymm
 // register, but the same isn't true when using memory operands instead.
 // Provide other assembly rr and rm forms to address this explicitly.
@@ -1024,14 +963,12 @@ def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
                         "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                         "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
-}
 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
 def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                      "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
 
 
-let isAsmParserOnly = 1 in {
 def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                          "cvtpd2ps\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
@@ -1040,7 +977,6 @@ def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
                          "cvtpd2ps\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
                                             (memop addr:$src)))]>;
-}
 def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                          "cvtpd2ps\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
@@ -1089,26 +1025,27 @@ def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
 // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
 multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
                             string asm, string asm_alt> {
-  def rr : SIi8<0xC2, MRMSrcReg,
-                    (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc),
-                    asm, []>;
-  let mayLoad = 1 in
-  def rm : SIi8<0xC2, MRMSrcMem,
-                    (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc),
-                    asm, []>;
-  // Accept explicit immediate argument form instead of comparison code.
   let isAsmParserOnly = 1 in {
-    def rr_alt : SIi8<0xC2, MRMSrcReg,
-                  (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
-                  asm_alt, []>;
+    def rr : SIi8<0xC2, MRMSrcReg,
+                  (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc),
+                  asm, []>;
     let mayLoad = 1 in
-    def rm_alt : SIi8<0xC2, MRMSrcMem,
-                  (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2),
-                  asm_alt, []>;
+    def rm : SIi8<0xC2, MRMSrcMem,
+                  (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc),
+                  asm, []>;
   }
+
+  // Accept explicit immediate argument form instead of comparison code.
+  def rr_alt : SIi8<0xC2, MRMSrcReg,
+                (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+                asm_alt, []>;
+  let mayLoad = 1 in
+  def rm_alt : SIi8<0xC2, MRMSrcMem,
+                (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2),
+                asm_alt, []>;
 }
 
-let neverHasSideEffects = 1, isAsmParserOnly = 1 in {
+let neverHasSideEffects = 1 in {
   defm VCMPSS  : sse12_cmp_scalar<FR32, f32mem,
                   "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
                   "cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
@@ -1141,14 +1078,12 @@ multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
 }
 
 // Aliases to match intrinsics which expect XMM operand(s).
-let isAsmParserOnly = 1 in {
-  defm Int_VCMPSS  : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
-                       "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
-                       XS, VEX_4V;
-  defm Int_VCMPSD  : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
-                       "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
-                       XD, VEX_4V;
-}
+defm Int_VCMPSS  : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+                     "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
+                     XS, VEX_4V;
+defm Int_VCMPSD  : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+                     "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
+                     XD, VEX_4V;
 let Constraints = "$src1 = $dst" in {
   defm Int_CMPSS  : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
                        "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS;
@@ -1171,28 +1106,26 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
 }
 
 let Defs = [EFLAGS] in {
-  let isAsmParserOnly = 1 in {
-    defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
-                                    "ucomiss", SSEPackedSingle>, VEX;
-    defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
-                                    "ucomisd", SSEPackedDouble>, OpSize, VEX;
-    let Pattern = []<dag> in {
-      defm VCOMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
-                                      "comiss", SSEPackedSingle>, VEX;
-      defm VCOMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
-                                      "comisd", SSEPackedDouble>, OpSize, VEX;
-    }
-
-    defm Int_VUCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
-                              load, "ucomiss", SSEPackedSingle>, VEX;
-    defm Int_VUCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
-                              load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
-
-    defm Int_VCOMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
-                              load, "comiss", SSEPackedSingle>, VEX;
-    defm Int_VCOMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
-                              load, "comisd", SSEPackedDouble>, OpSize, VEX;
+  defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+                                  "ucomiss", SSEPackedSingle>, VEX;
+  defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+                                  "ucomisd", SSEPackedDouble>, OpSize, VEX;
+  let Pattern = []<dag> in {
+    defm VCOMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+                                    "comiss", SSEPackedSingle>, VEX;
+    defm VCOMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+                                    "comisd", SSEPackedDouble>, OpSize, VEX;
   }
+
+  defm Int_VUCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+                            load, "ucomiss", SSEPackedSingle>, VEX;
+  defm Int_VUCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+                            load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
+
+  defm Int_VCOMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
+                            load, "comiss", SSEPackedSingle>, VEX;
+  defm Int_VCOMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
+                            load, "comisd", SSEPackedDouble>, OpSize, VEX;
   defm UCOMISS  : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
                                   "ucomiss", SSEPackedSingle>, TB;
   defm UCOMISD  : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
@@ -1220,41 +1153,40 @@ let Defs = [EFLAGS] in {
 multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
                             Intrinsic Int, string asm, string asm_alt,
                             Domain d> {
-  def rri : PIi8<0xC2, MRMSrcReg,
-             (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm,
-             [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>;
-  def rmi : PIi8<0xC2, MRMSrcMem,
-             (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm,
-             [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>;
-  // Accept explicit immediate argument form instead of comparison code.
   let isAsmParserOnly = 1 in {
-    def rri_alt : PIi8<0xC2, MRMSrcReg,
-               (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
-               asm_alt, [], d>;
-    def rmi_alt : PIi8<0xC2, MRMSrcMem,
-               (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2),
-               asm_alt, [], d>;
+    def rri : PIi8<0xC2, MRMSrcReg,
+               (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm,
+               [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>;
+    def rmi : PIi8<0xC2, MRMSrcMem,
+               (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm,
+               [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>;
   }
-}
 
-let isAsmParserOnly = 1 in {
-  defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
-                 "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
-                 "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
-                 SSEPackedSingle>, VEX_4V;
-  defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
-                 "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
-                 "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
-                 SSEPackedDouble>, OpSize, VEX_4V;
-  defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
-                 "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
-                 "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
-                 SSEPackedSingle>, VEX_4V;
-  defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
-                 "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
-                 "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
-                 SSEPackedDouble>, OpSize, VEX_4V;
-}
+  // Accept explicit immediate argument form instead of comparison code.
+  def rri_alt : PIi8<0xC2, MRMSrcReg,
+             (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+             asm_alt, [], d>;
+  def rmi_alt : PIi8<0xC2, MRMSrcMem,
+             (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2),
+             asm_alt, [], d>;
+}
+
+defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+               "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+               "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+               SSEPackedSingle>, VEX_4V;
+defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+               "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+               "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+               SSEPackedDouble>, OpSize, VEX_4V;
+defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+               "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+               "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+               SSEPackedSingle>, VEX_4V;
+defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+               "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+               "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+               SSEPackedDouble>, OpSize, VEX_4V;
 let Constraints = "$src1 = $dst" in {
   defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
                  "cmp${cc}ps\t{$src, $dst|$dst, $src}",
@@ -1294,20 +1226,18 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
                             (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>;
 }
 
-let isAsmParserOnly = 1 in {
-  defm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
-             "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-             memopv4f32, SSEPackedSingle>, VEX_4V;
-  defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
-             "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-             memopv8f32, SSEPackedSingle>, VEX_4V;
-  defm VSHUFPD  : sse12_shuffle<VR128, f128mem, v2f64,
-             "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
-             memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
-  defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
-             "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
-             memopv4f64, SSEPackedDouble>, OpSize, VEX_4V;
-}
+defm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
+           "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+           memopv4f32, SSEPackedSingle>, TB, VEX_4V;
+defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+           "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+           memopv8f32, SSEPackedSingle>, TB, VEX_4V;
+defm VSHUFPD  : sse12_shuffle<VR128, f128mem, v2f64,
+           "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+           memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
+defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+           "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+           memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
 
 let Constraints = "$src1 = $dst" in {
   defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -1340,33 +1270,31 @@ multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
 }
 
 let AddedComplexity = 10 in {
-  let isAsmParserOnly = 1 in {
-    defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
-          VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                         SSEPackedSingle>, VEX_4V;
-    defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
-          VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                         SSEPackedDouble>, OpSize, VEX_4V;
-    defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
-          VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                         SSEPackedSingle>, VEX_4V;
-    defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
-          VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                         SSEPackedDouble>, OpSize, VEX_4V;
-
-    defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
-          VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                         SSEPackedSingle>, VEX_4V;
-    defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
-          VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                         SSEPackedDouble>, OpSize, VEX_4V;
-    defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
-          VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                         SSEPackedSingle>, VEX_4V;
-    defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
-          VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                         SSEPackedDouble>, OpSize, VEX_4V;
-  }
+  defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+        VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedSingle>, VEX_4V;
+  defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+        VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedDouble>, OpSize, VEX_4V;
+  defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+        VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedSingle>, VEX_4V;
+  defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+        VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedDouble>, OpSize, VEX_4V;
+
+  defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
+        VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedSingle>, VEX_4V;
+  defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
+        VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedDouble>, OpSize, VEX_4V;
+  defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
+        VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedSingle>, VEX_4V;
+  defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
+        VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       SSEPackedDouble>, OpSize, VEX_4V;
 
   let Constraints = "$src1 = $dst" in {
     defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
@@ -1404,30 +1332,28 @@ defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
 defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
                                      SSEPackedDouble>, TB, OpSize;
 
-let isAsmParserOnly = 1 in {
-  defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
-                                        "movmskps", SSEPackedSingle>, VEX;
-  defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
-                                        "movmskpd", SSEPackedDouble>, OpSize,
-                                        VEX;
-  defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
-                                        "movmskps", SSEPackedSingle>, VEX;
-  defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
-                                        "movmskpd", SSEPackedDouble>, OpSize,
-                                        VEX;
+defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
+                                      "movmskps", SSEPackedSingle>, VEX;
+defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
+                                      "movmskpd", SSEPackedDouble>, OpSize,
+                                      VEX;
+defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+                                      "movmskps", SSEPackedSingle>, VEX;
+defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+                                      "movmskpd", SSEPackedDouble>, OpSize,
+                                      VEX;
 
-  // Assembler Only
-  def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
-             "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
-  def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
-             "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
-             VEX;
-  def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
-             "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
-  def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
-             "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
-             VEX;
-}
+// Assembler Only
+def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+           "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+           "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+           VEX;
+def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+           "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+           "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+           VEX;
 
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions
@@ -1482,13 +1408,11 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
 ///
 multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
                                        SDNode OpNode> {
-  let isAsmParserOnly = 1 in {
-    defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
-                FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V;
+  defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+              FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V;
 
-    defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
-          FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V;
-  }
+  defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+        FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V;
 
   let Constraints = "$src1 = $dst" in {
     defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
@@ -1514,7 +1438,7 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
 multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
                                  SDNode OpNode, int HasPat = 0,
                                  list<list<dag>> Pattern = []> {
-  let isAsmParserOnly = 1, Pattern = []<dag> in {
+  let Pattern = []<dag> in {
     defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
          !strconcat(OpcodeStr, "ps"), f128mem,
          !if(HasPat, Pattern[0], // rr
@@ -1561,7 +1485,6 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
 
 /// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
 ///
-let isAsmParserOnly = 1 in {
 multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> {
     defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
           !strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V;
@@ -1569,7 +1492,6 @@ multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> {
     defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
           !strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V;
 }
-}
 
 // AVX 256-bit packed logical ops forms
 defm VAND : sse12_fp_packed_logical_y<0x54, "and">;
@@ -1667,38 +1589,36 @@ multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
 }
 
 // Binary Arithmetic instructions
-let isAsmParserOnly = 1 in {
-  defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
-              basic_sse12_fp_binop_s_int<0x58, "add", 0>,
-              basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
-              basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
-  defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
-              basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
-              basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
-              basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
+defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
+            basic_sse12_fp_binop_s_int<0x58, "add", 0>,
+            basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
+            basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
+defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
+            basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
+            basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
+            basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
 
-  let isCommutable = 0 in {
-    defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
-                basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
-                basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
-                basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
-    defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
-                basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
-                basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
-                basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
-    defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
-                basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
-                basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
-                basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
-                basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
-                basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
-    defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
-                basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
-                basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
-                basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
-                basic_sse12_fp_binop_p_y_int<0x5D, "min">,
-                basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
-  }
+let isCommutable = 0 in {
+  defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
+              basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
+              basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
+              basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
+  defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
+              basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
+              basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
+              basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
+  defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
+              basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
+              basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
+              basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
+              basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
+              basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
+  defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
+              basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
+              basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+              basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
+              basic_sse12_fp_binop_p_y_int<0x5D, "min">,
+              basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
 }
 
 let Constraints = "$src1 = $dst" in {
@@ -1899,7 +1819,7 @@ multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
                     [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
   // Square root.
   defm VSQRT  : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
                 sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>,
@@ -1955,67 +1875,65 @@ defm RCP   : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
 // SSE 1 & 2 - Non-temporal stores
 //===----------------------------------------------------------------------===//
 
-let isAsmParserOnly = 1 in {
-  def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs),
-                         (ins i128mem:$dst, VR128:$src),
-                         "movntps\t{$src, $dst|$dst, $src}",
-                         [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX;
-  def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs),
-                         (ins i128mem:$dst, VR128:$src),
-                         "movntpd\t{$src, $dst|$dst, $src}",
-                         [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX;
+def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs),
+                       (ins i128mem:$dst, VR128:$src),
+                       "movntps\t{$src, $dst|$dst, $src}",
+                       [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX;
+def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs),
+                       (ins i128mem:$dst, VR128:$src),
+                       "movntpd\t{$src, $dst|$dst, $src}",
+                       [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX;
 
-  let ExeDomain = SSEPackedInt in
-    def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs),
+let ExeDomain = SSEPackedInt in
+  def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs),
+                     (ins f128mem:$dst, VR128:$src),
+                     "movntdq\t{$src, $dst|$dst, $src}",
+                     [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX;
+
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+  def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+                       (ins f128mem:$dst, VR128:$src),
+                       "movntps\t{$src, $dst|$dst, $src}",
+                       [(alignednontemporalstore (v4f32 VR128:$src),
+                                                 addr:$dst)]>, VEX;
+  def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
                        (ins f128mem:$dst, VR128:$src),
-                       "movntdq\t{$src, $dst|$dst, $src}",
-                       [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX;
-
-  let AddedComplexity = 400 in { // Prefer non-temporal versions
-    def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
-                         (ins f128mem:$dst, VR128:$src),
-                         "movntps\t{$src, $dst|$dst, $src}",
-                         [(alignednontemporalstore (v4f32 VR128:$src),
-                                                   addr:$dst)]>, VEX;
-    def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
-                         (ins f128mem:$dst, VR128:$src),
-                         "movntpd\t{$src, $dst|$dst, $src}",
-                         [(alignednontemporalstore (v2f64 VR128:$src),
-                                                   addr:$dst)]>, VEX;
-    def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
-                          (ins f128mem:$dst, VR128:$src),
-                          "movntdq\t{$src, $dst|$dst, $src}",
-                          [(alignednontemporalstore (v2f64 VR128:$src),
-                                                    addr:$dst)]>, VEX;
-    let ExeDomain = SSEPackedInt in
-    def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+                       "movntpd\t{$src, $dst|$dst, $src}",
+                       [(alignednontemporalstore (v2f64 VR128:$src),
+                                                 addr:$dst)]>, VEX;
+  def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
                         (ins f128mem:$dst, VR128:$src),
                         "movntdq\t{$src, $dst|$dst, $src}",
-                        [(alignednontemporalstore (v4f32 VR128:$src),
+                        [(alignednontemporalstore (v2f64 VR128:$src),
                                                   addr:$dst)]>, VEX;
-
-    def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
-                         (ins f256mem:$dst, VR256:$src),
-                         "movntps\t{$src, $dst|$dst, $src}",
-                         [(alignednontemporalstore (v8f32 VR256:$src),
-                                                   addr:$dst)]>, VEX;
-    def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
-                         (ins f256mem:$dst, VR256:$src),
-                         "movntpd\t{$src, $dst|$dst, $src}",
-                         [(alignednontemporalstore (v4f64 VR256:$src),
-                                                   addr:$dst)]>, VEX;
-    def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
-                          (ins f256mem:$dst, VR256:$src),
-                          "movntdq\t{$src, $dst|$dst, $src}",
-                          [(alignednontemporalstore (v4f64 VR256:$src),
-                                                    addr:$dst)]>, VEX;
-    let ExeDomain = SSEPackedInt in
-    def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+  let ExeDomain = SSEPackedInt in
+  def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+                      (ins f128mem:$dst, VR128:$src),
+                      "movntdq\t{$src, $dst|$dst, $src}",
+                      [(alignednontemporalstore (v4f32 VR128:$src),
+                                                addr:$dst)]>, VEX;
+
+  def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+                       (ins f256mem:$dst, VR256:$src),
+                       "movntps\t{$src, $dst|$dst, $src}",
+                       [(alignednontemporalstore (v8f32 VR256:$src),
+                                                 addr:$dst)]>, VEX;
+  def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+                       (ins f256mem:$dst, VR256:$src),
+                       "movntpd\t{$src, $dst|$dst, $src}",
+                       [(alignednontemporalstore (v4f64 VR256:$src),
+                                                 addr:$dst)]>, VEX;
+  def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
                         (ins f256mem:$dst, VR256:$src),
                         "movntdq\t{$src, $dst|$dst, $src}",
-                        [(alignednontemporalstore (v8f32 VR256:$src),
+                        [(alignednontemporalstore (v4f64 VR256:$src),
                                                   addr:$dst)]>, VEX;
-  }
+  let ExeDomain = SSEPackedInt in
+  def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+                      (ins f256mem:$dst, VR256:$src),
+                      "movntdq\t{$src, $dst|$dst, $src}",
+                      [(alignednontemporalstore (v8f32 VR256:$src),
+                                                addr:$dst)]>, VEX;
 }
 
 def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
@@ -2138,12 +2056,10 @@ def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
 // SSE 1 & 2 - Load/Store XCSR register
 //===----------------------------------------------------------------------===//
 
-let isAsmParserOnly = 1 in {
-  def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
-                    "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
-  def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
-                    "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
-}
+def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+                  "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
+def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+                  "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
 
 def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
                   "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
@@ -2156,45 +2072,43 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
 
 let ExeDomain = SSEPackedInt in { // SSE integer instructions
 
-let isAsmParserOnly = 1 in {
-  let neverHasSideEffects = 1 in {
-  def VMOVDQArr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                      "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
-  def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
-                      "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
-  }
-  def VMOVDQUrr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                      "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
-  def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
-                      "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
-
-  let canFoldAsLoad = 1, mayLoad = 1 in {
-  def VMOVDQArm  : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
-                     "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
-  def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
-                     "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
-  let Predicates = [HasAVX] in {
-    def VMOVDQUrm  : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
-                      "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
-    def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
-                      "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
-  }
-  }
+let neverHasSideEffects = 1 in {
+def VMOVDQArr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                    "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def VMOVDQUrr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                    "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
 
-  let mayStore = 1 in {
-  def VMOVDQAmr  : VPDI<0x7F, MRMDestMem, (outs),
-                       (ins i128mem:$dst, VR128:$src),
-                       "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
-  def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
-                       (ins i256mem:$dst, VR256:$src),
-                       "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
-  let Predicates = [HasAVX] in {
-  def VMOVDQUmr  : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+let canFoldAsLoad = 1, mayLoad = 1 in {
+def VMOVDQArm  : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                   "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+                   "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+let Predicates = [HasAVX] in {
+  def VMOVDQUrm  : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                     "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
-  def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+  def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
                     "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
-  }
-  }
+}
+}
+
+let mayStore = 1 in {
+def VMOVDQAmr  : VPDI<0x7F, MRMDestMem, (outs),
+                     (ins i128mem:$dst, VR128:$src),
+                     "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
+                     (ins i256mem:$dst, VR256:$src),
+                     "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+let Predicates = [HasAVX] in {
+def VMOVDQUmr  : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                  "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+                  "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+}
 }
 
 let neverHasSideEffects = 1 in
@@ -2226,23 +2140,11 @@ def MOVDQUmr :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
 }
 
 // Intrinsic forms of MOVDQU load and store
-let isAsmParserOnly = 1 in {
-let canFoldAsLoad = 1 in
-def VMOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
-                       "vmovdqu\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
-                     XS, VEX, Requires<[HasAVX]>;
 def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
                        "vmovdqu\t{$src, $dst|$dst, $src}",
                        [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
                      XS, VEX, Requires<[HasAVX]>;
-}
 
-let canFoldAsLoad = 1 in
-def MOVDQUrm_Int :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
-                       "movdqu\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
-                 XS, Requires<[HasSSE2]>;
 def MOVDQUmr_Int :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
                        "movdqu\t{$src, $dst|$dst, $src}",
                        [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
@@ -2347,7 +2249,7 @@ multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
 
 // 128-bit Integer Arithmetic
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
 defm VPADDB  : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V;
 defm VPADDW  : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V;
 defm VPADDD  : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V;
@@ -2437,7 +2339,7 @@ defm PSADBW  : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
 // SSE2 - Packed Integer Logical Instructions
 //===---------------------------------------------------------------------===//
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
 defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
                                 int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
                                 VEX_4V;
@@ -2584,7 +2486,7 @@ let Predicates = [HasSSE2] in {
 // SSE2 - Packed Integer Comparison Instructions
 //===---------------------------------------------------------------------===//
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
   defm VPCMPEQB  : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1,
                                     0>, VEX_4V;
   defm VPCMPEQW  : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1,
@@ -2638,7 +2540,7 @@ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
 // SSE2 - Packed Integer Pack Instructions
 //===---------------------------------------------------------------------===//
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
 defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
                                   0, 0>, VEX_4V;
 defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
@@ -2676,7 +2578,7 @@ def mi : Ii8<0x70, MRMSrcMem,
 }
 } // ExeDomain = SSEPackedInt
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
   let AddedComplexity = 5 in
   defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize,
                                VEX;
@@ -2724,7 +2626,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
                                                addr:$src2))))]>;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
   defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8,
                                  0>, VEX_4V;
   defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16,
@@ -2834,7 +2736,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
 }
 
 // Extract
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
 def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
                     (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
                     "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -2847,7 +2749,7 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
                                                 imm:$src2))]>;
 
 // Insert
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
   defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
   def  VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
        (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
@@ -2866,13 +2768,11 @@ let Constraints = "$src1 = $dst" in
 
 let ExeDomain = SSEPackedInt in {
 
-let isAsmParserOnly = 1 in {
 def VPMOVMSKBrr  : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
            "pmovmskb\t{$src, $dst|$dst, $src}",
            [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
 def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
            "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
-}
 def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
            "pmovmskb\t{$src, $dst|$dst, $src}",
            [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
@@ -2885,7 +2785,6 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
 
 let ExeDomain = SSEPackedInt in {
 
-let isAsmParserOnly = 1 in {
 let Uses = [EDI] in
 def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
            (ins VR128:$src, VR128:$mask),
@@ -2896,7 +2795,6 @@ def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
            (ins VR128:$src, VR128:$mask),
            "maskmovdqu\t{$mask, $src|$src, $mask}",
            [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX;
-}
 
 let Uses = [EDI] in
 def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
@@ -2914,7 +2812,6 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
 //===---------------------------------------------------------------------===//
 
 // Move Int Doubleword to Packed Double Int
-let isAsmParserOnly = 1 in {
 def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
@@ -2924,7 +2821,6 @@ def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                       [(set VR128:$dst,
                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
                       VEX;
-}
 def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
@@ -2943,7 +2839,6 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
 
 
 // Move Int Doubleword to Single Scalar
-let isAsmParserOnly = 1 in {
 def VMOVDI2SSrr  : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
@@ -2952,7 +2847,6 @@ def VMOVDI2SSrm  : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
                       VEX;
-}
 def MOVDI2SSrr  : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (bitconvert GR32:$src))]>;
@@ -2962,7 +2856,6 @@ def MOVDI2SSrm  : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
                       [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
 
 // Move Packed Doubleword Int to Packed Double Int
-let isAsmParserOnly = 1 in {
 def VMOVPDI2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2972,7 +2865,6 @@ def VMOVPDI2DImr  : VPDI<0x7E, MRMDestMem, (outs),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(store (i32 (vector_extract (v4i32 VR128:$src),
                                      (iPTR 0))), addr:$dst)]>, VEX;
-}
 def MOVPDI2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2998,14 +2890,12 @@ def MOVSDto64mr  : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
                         [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
 
 // Move Scalar Single to Double Int
-let isAsmParserOnly = 1 in {
 def VMOVSS2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
 def VMOVSS2DImr  : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
-}
 def MOVSS2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (bitconvert FR32:$src))]>;
@@ -3014,7 +2904,7 @@ def MOVSS2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
                       [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
 
 // movd / movq to XMM register zero-extends
-let AddedComplexity = 15, isAsmParserOnly = 1 in {
+let AddedComplexity = 15 in {
 def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (v4i32 (X86vzmovl
@@ -3038,7 +2928,6 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
 }
 
 let AddedComplexity = 20 in {
-let isAsmParserOnly = 1 in
 def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
@@ -3064,7 +2953,6 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
 //===---------------------------------------------------------------------===//
 
 // Move Quadword Int to Packed Quadword Int
-let isAsmParserOnly = 1 in
 def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                     "vmovq\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst,
@@ -3077,7 +2965,6 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                     Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
 
 // Move Packed Quadword Int to Quadword Int
-let isAsmParserOnly = 1 in
 def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                       "movq\t{$src, $dst|$dst, $src}",
                       [(store (i64 (vector_extract (v2i64 VR128:$src),
@@ -3091,7 +2978,6 @@ def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
           (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
 
 // Store / copy lower 64-bits of a XMM register.
-let isAsmParserOnly = 1 in
 def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                      "movq\t{$src, $dst|$dst, $src}",
                      [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
@@ -3099,7 +2985,7 @@ def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                      "movq\t{$src, $dst|$dst, $src}",
                      [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
 
-let AddedComplexity = 20, isAsmParserOnly = 1 in
+let AddedComplexity = 20 in
 def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                      "vmovq\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
@@ -3124,7 +3010,7 @@ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
 
 // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
 // IA32 document. movq xmm1, xmm2 does clear the high bits.
-let isAsmParserOnly = 1, AddedComplexity = 15 in
+let AddedComplexity = 15 in
 def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
@@ -3135,7 +3021,7 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
                       XS, Requires<[HasSSE2]>;
 
-let AddedComplexity = 20, isAsmParserOnly = 1 in
+let AddedComplexity = 20 in
 def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst, (v2i64 (X86vzmovl
@@ -3153,7 +3039,6 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
 }
 
 // Instructions to match in the assembler
-let isAsmParserOnly = 1 in {
 def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                       "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
 def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
@@ -3161,13 +3046,12 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
 // Recognize "movd" with GR64 destination, but encode as a "movq"
 def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                           "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
-}
 
 // Instructions for the disassembler
 // xr = XMM register
 // xm = mem64
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
 def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                  "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
 def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -3209,7 +3093,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
 //===---------------------------------------------------------------------===//
 
 // Convert Packed Double FP to Packed DW Integers
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
 // The assembler can recognize rr 256-bit instructions by seeing a ymm
 // register, but the same isn't true when using memory operands instead.
 // Provide other assembly rr and rm forms to address this explicitly.
@@ -3237,7 +3121,7 @@ def CVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
 
 // Convert Packed DW Integers to Packed Double FP
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
 def VCVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                      "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -3288,7 +3172,7 @@ def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
   // FIXME: Merge above classes when we have patterns for the ymm version
   defm VMOVSHDUP  : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
   defm VMOVSLDUP  : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
@@ -3319,7 +3203,7 @@ def rm  : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                     []>;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
   // FIXME: Merge above classes when we have patterns for the ymm version
   defm VMOVDDUP  : sse3_replicate_dfp<"vmovddup">, VEX;
   defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
@@ -3327,7 +3211,7 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in {
 defm MOVDDUP : sse3_replicate_dfp<"movddup">;
 
 // Move Unaligned Integer
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
   def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "vlddqu\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
@@ -3391,21 +3275,21 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
        [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX],
+let Predicates = [HasAVX],
   ExeDomain = SSEPackedDouble in {
   defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
-                               f128mem, 0>, XD, VEX_4V;
+                               f128mem, 0>, TB, XD, VEX_4V;
   defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
-                               f128mem, 0>, OpSize, VEX_4V;
+                               f128mem, 0>, TB, OpSize, VEX_4V;
   defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
-                               f256mem, 0>, XD, VEX_4V;
+                               f256mem, 0>, TB, XD, VEX_4V;
   defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
-                               f256mem, 0>, OpSize, VEX_4V;
+                               f256mem, 0>, TB, OpSize, VEX_4V;
 }
 let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
     ExeDomain = SSEPackedDouble in {
   defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
-                              f128mem>, XD;
+                              f128mem>, TB, XD;
   defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
                               f128mem>, TB, OpSize;
 }
@@ -3444,7 +3328,7 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
       [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
   defm VHADDPS  : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
                           int_x86_sse3_hadd_ps, 0>, VEX_4V;
   defm VHADDPD  : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
@@ -3496,7 +3380,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
                        (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
   defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
                                   int_x86_ssse3_pabs_b_128>, VEX;
   defm VPABSW  : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
@@ -3538,7 +3422,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
           (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
 let isCommutable = 0 in {
   defm VPHADDW    : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
                                       int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
@@ -3630,7 +3514,7 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
       []>, OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
   defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
 let Constraints = "$src1 = $dst" in
   defm PALIGN : ssse3_palign<"palignr">;
@@ -3985,7 +3869,7 @@ multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
        OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
 defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
                                      VEX;
 defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
@@ -4051,7 +3935,7 @@ multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
           OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
 defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
                                      VEX;
 defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>,
@@ -4092,7 +3976,7 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
                  OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
 defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
                                      VEX;
 defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
@@ -4134,7 +4018,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
 // (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
   defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
   def  VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
          (ins VR128:$src1, i32i8imm:$src2),
@@ -4156,7 +4040,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
 // (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
   defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
 
 defm PEXTRW      : SS41I_extract16<0x15, "pextrw">;
@@ -4178,7 +4062,7 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
                           addr:$dst)]>, OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
   defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
 
 defm PEXTRD      : SS41I_extract32<0x16, "pextrd">;
@@ -4199,7 +4083,7 @@ multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
                           addr:$dst)]>, OpSize, REX_W;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
   defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
 
 defm PEXTRQ      : SS41I_extract64<0x16, "pextrq">;
@@ -4222,7 +4106,7 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
                           addr:$dst)]>, OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
   defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
   def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
                   (ins VR128:$src1, i32i8imm:$src2),
@@ -4262,7 +4146,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
                    imm:$src3))]>, OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
   defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
 let Constraints = "$src1 = $dst" in
   defm PINSRB  : SS41I_insert8<0x20, "pinsrb">;
@@ -4288,7 +4172,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
                           imm:$src3)))]>, OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
   defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
 let Constraints = "$src1 = $dst" in
   defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
@@ -4314,7 +4198,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
                           imm:$src3)))]>, OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
   defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
 let Constraints = "$src1 = $dst" in
   defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
@@ -4347,7 +4231,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
 
 let Constraints = "$src1 = $dst" in
   defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
   defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
 
 def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
@@ -4517,7 +4401,7 @@ multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
 }
 
 // FP round - roundss, roundps, roundsd, roundpd
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
   // Intrinsic form
   defm VROUND  : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
                                   memopv4f32, memopv2f64,
@@ -4552,7 +4436,7 @@ defm ROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "round",
 
 // ptest instruction we'll lower to this in X86ISelLowering primarily from
 // the intel intrinsic that corresponds to this.
-let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
 def VPTESTrr  : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
                 "vptest\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
@@ -4595,7 +4479,7 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
             OpSize, VEX;
 }
 
-let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
 defm VTESTPS  : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
 defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
 defm VTESTPD  : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
@@ -4644,7 +4528,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
                        (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
 defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
                                          int_x86_sse41_phminposuw>, VEX;
 defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
@@ -4670,7 +4554,7 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
           (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
   let isCommutable = 0 in
   defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
                                                          0>, VEX_4V;
@@ -4737,7 +4621,7 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
        OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
   defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
 let Constraints = "$src1 = $dst" in
   defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
@@ -4769,7 +4653,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
         OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
   let isCommutable = 0 in {
   defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
                                       VR128, memopv16i8, i128mem, 0>, VEX_4V;
@@ -4810,7 +4694,7 @@ let Constraints = "$src1 = $dst" in {
 }
 
 /// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
 multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
                                     RegisterClass RC, X86MemOperand x86memop,
                                     PatFrag mem_frag, Intrinsic IntId> {
@@ -4870,7 +4754,7 @@ defm PBLENDVB     : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
 def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
           (PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
 def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                        "vmovntdqa\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
@@ -4904,7 +4788,7 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
           (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
   defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
                                      0>, VEX_4V;
 let Constraints = "$src1 = $dst" in
@@ -4936,8 +4820,7 @@ let Defs = [EFLAGS], usesCustomInserter = 1 in {
   defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
 }
 
-let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1,
-    Predicates = [HasAVX] in {
+let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in {
   def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
       (ins VR128:$src1, VR128:$src2, i8imm:$src3),
       "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
@@ -4972,7 +4855,7 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
   defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX],
+let Predicates = [HasAVX],
     Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
   def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
       (ins VR128:$src1, VR128:$src3, i8imm:$src5),
@@ -5007,7 +4890,7 @@ let Defs = [ECX, EFLAGS] in {
   }
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
 defm VPCMPISTRI  : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
                                     VEX;
 defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
@@ -5046,7 +4929,7 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
   }
 }
 
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
 defm VPCMPESTRI  : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
                                     VEX;
 defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
@@ -5165,7 +5048,7 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
 }
 
 // Perform One Round of an AES Encryption/Decryption Flow
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+let Predicates = [HasAVX, HasAES] in {
   defm VAESENC          : AESI_binop_rm_int<0xDC, "vaesenc",
                          int_x86_aesni_aesenc, 0>, VEX_4V;
   defm VAESENCLAST      : AESI_binop_rm_int<0xDD, "vaesenclast",
@@ -5205,7 +5088,7 @@ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
           (AESDECLASTrm VR128:$src1, addr:$src2)>;
 
 // Perform the AES InvMixColumn Transformation
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+let Predicates = [HasAVX, HasAES] in {
   def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
       (ins VR128:$src1),
       "vaesimc\t{$src1, $dst|$dst, $src1}",
@@ -5233,7 +5116,7 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
   OpSize;
 
 // AES Round Key Generation Assist
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+let Predicates = [HasAVX, HasAES] in {
   def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
       (ins VR128:$src1, i8imm:$src2),
       "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -5269,7 +5152,6 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
 // Only the AVX version of CLMUL instructions are described here.
 
 // Carry-less Multiplication instructions
-let isAsmParserOnly = 1 in {
 def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
            (ins VR128:$src1, VR128:$src2, i8imm:$src3),
            "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -5295,13 +5177,10 @@ defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">;
 defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">;
 defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">;
 
-} // isAsmParserOnly
-
 //===----------------------------------------------------------------------===//
 // AVX Instructions
 //===----------------------------------------------------------------------===//
 
-let isAsmParserOnly = 1 in {
 
 // Load from memory and broadcast to all elements of the destination operand
 class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
@@ -5435,8 +5314,6 @@ def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
 def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
                    [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>;
 
-} // isAsmParserOnly
-
 def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
           (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
 def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
@@ -5622,11 +5499,15 @@ def : Pat<(X86Movddup (bc_v2f64
 // Shuffle with UNPCKLPS
 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
           (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
+          (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
           (UNPCKLPSrm VR128:$src1, addr:$src2)>;
 
 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
           (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+          (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
           (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
 
@@ -5644,11 +5525,15 @@ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
 // Shuffle with UNPCKLPD
 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
           (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
+          (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
           (UNPCKLPDrm VR128:$src1, addr:$src2)>;
 
 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
           (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+          (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
           (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
 
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 6a24d14..f73cff3 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -34,9 +34,16 @@ let Uses = [EFLAGS] in
   def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
 def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
               [(int_x86_int (i8 3))]>;
+
+// The long form of "int $3" turns into int3 as a size optimization.
+// FIXME: This doesn't work because InstAlias can't match immediate constants.
+//def : InstAlias<"int\t$3", (INT3)>;
+
+
 def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
               [(int_x86_int imm:$trap)]>;
 
+
 def SYSCALL  : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
 def SYSRETL  : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
 def SYSRETQ  :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
@@ -207,10 +214,15 @@ def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
 
 def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
 
-def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins),
-             "str{w}\t{$dst}", []>, TB;
-def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
-             "str{w}\t{$dst}", []>, TB;
+def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
+               "str{w}\t{$dst}", []>, TB, OpSize;
+def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
+               "str{l}\t{$dst}", []>, TB;
+def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
+                "str{q}\t{$dst}", []>, TB;
+def STRm   : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
+               "str{w}\t{$dst}", []>, TB;
+
 def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
              "ltr{w}\t{$src}", []>, TB;
 def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
@@ -393,3 +405,23 @@ let Defs = [RDX, RAX], Uses = [RCX] in
 
 let Uses = [RDX, RAX, RCX] in
   def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// VIA PadLock crypto instructions
+let Defs = [RAX, RDI], Uses = [RDX, RDI] in
+  def XSTORE : I<0xc0, RawFrm, (outs), (ins), "xstore", []>, A7;
+
+let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in {
+  def XCRYPTECB : I<0xc8, RawFrm, (outs), (ins), "xcryptecb", []>, A7;
+  def XCRYPTCBC : I<0xd0, RawFrm, (outs), (ins), "xcryptcbc", []>, A7;
+  def XCRYPTCTR : I<0xd8, RawFrm, (outs), (ins), "xcryptctr", []>, A7;
+  def XCRYPTCFB : I<0xe0, RawFrm, (outs), (ins), "xcryptcfb", []>, A7;
+  def XCRYPTOFB : I<0xe8, RawFrm, (outs), (ins), "xcryptofb", []>, A7;
+}
+
+let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
+  def XSHA1 : I<0xc8, RawFrm, (outs), (ins), "xsha1", []>, A6;
+  def XSHA256 : I<0xd0, RawFrm, (outs), (ins), "xsha256", []>, A6;
+}
+let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
+  def MONTMUL : I<0xc0, RawFrm, (outs), (ins), "montmul", []>, A6;
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index 6686214..83bba52 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -15,7 +15,9 @@
 #include "X86TargetMachine.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ELF.h"
 using namespace llvm;
@@ -69,7 +71,22 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
   DwarfUsesInlineInfoSection = true;
 
   // Exceptions handling
-  ExceptionsType = ExceptionHandling::DwarfTable;
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+}
+
+const MCExpr *
+X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
+                                                   unsigned Encoding,
+                                                   MCStreamer &Streamer) const {
+  MCContext &Context = Streamer.getContext();
+  const MCExpr *Res =
+    MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
+  const MCExpr *Four = MCConstantExpr::Create(4, Context);
+  return MCBinaryExpr::CreateAdd(Res, Four, Context);
+}
+
+X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
+  : X86MCAsmInfoDarwin(Triple) {
 }
 
 X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
@@ -89,7 +106,9 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
   SupportsDebugInformation = true;
 
   // Exceptions handling
-  ExceptionsType = ExceptionHandling::DwarfTable;
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+
+  DwarfRequiresFrameSection = false;
 
   // OpenBSD has buggy support for .quad in 32-bit mode, just split into two
   // .words.
diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h
index 5815225..2cd4c8e 100644
--- a/lib/Target/X86/X86MCAsmInfo.h
+++ b/lib/Target/X86/X86MCAsmInfo.h
@@ -25,6 +25,14 @@ namespace llvm {
     explicit X86MCAsmInfoDarwin(const Triple &Triple);
   };
 
+  struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin {
+    explicit X86_64MCAsmInfoDarwin(const Triple &Triple);
+    virtual const MCExpr *
+    getExprForPersonalitySymbol(const MCSymbol *Sym,
+                                unsigned Encoding,
+                                MCStreamer &Streamer) const;
+  };
+
   struct X86ELFMCAsmInfo : public MCAsmInfo {
     explicit X86ELFMCAsmInfo(const Triple &Triple);
     virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 0e3b571..f195a67 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -382,7 +382,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
                                            const TargetInstrDesc &Desc,
                                            raw_ostream &OS) const {
   bool HasVEX_4V = false;
-  if ((TSFlags >> 32) & X86II::VEX_4V)
+  if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
     HasVEX_4V = true;
 
   // VEX_R: opcode externsion equivalent to REX.R in
@@ -446,10 +446,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   if (TSFlags & X86II::OpSize)
     VEX_PP = 0x01;
 
-  if ((TSFlags >> 32) & X86II::VEX_W)
+  if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
     VEX_W = 1;
 
-  if ((TSFlags >> 32) & X86II::VEX_L)
+  if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
     VEX_L = 1;
 
   switch (TSFlags & X86II::Op0Mask) {
@@ -470,6 +470,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   case X86II::XD:  // F2 0F
     VEX_PP = 0x3;
     break;
+  case X86II::A6:  // Bypass: Not used by VEX
+  case X86II::A7:  // Bypass: Not used by VEX
   case X86II::TB:  // Bypass: Not used by VEX
   case 0:
     break;  // No prefix!
@@ -512,13 +514,13 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
     }
 
     // To only check operands before the memory address ones, start
-    // the search from the begining
+    // the search from the beginning
     if (IsDestMem)
       CurOp = 0;
 
     // If the last register should be encoded in the immediate field
     // do not use any bit from VEX prefix to this register, ignore it
-    if ((TSFlags >> 32) & X86II::VEX_I8IMM)
+    if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM)
       NumOps--;
 
     for (; CurOp != NumOps; ++CurOp) {
@@ -742,6 +744,8 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   case X86II::TB:  // Two-byte opcode prefix
   case X86II::T8:  // 0F 38
   case X86II::TA:  // 0F 3A
+  case X86II::A6:  // 0F A6
+  case X86II::A7:  // 0F A7
     Need0FPrefix = true;
     break;
   case X86II::TF: // F2 0F 38
@@ -786,6 +790,12 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   case X86II::TA:    // 0F 3A
     EmitByte(0x3A, CurByte, OS);
     break;
+  case X86II::A6:    // 0F A6
+    EmitByte(0xA6, CurByte, OS);
+    break;
+  case X86II::A7:    // 0F A7
+    EmitByte(0xA7, CurByte, OS);
+    break;
   }
 }
 
@@ -819,9 +829,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   // It uses the VEX.VVVV field?
   bool HasVEX_4V = false;
 
-  if ((TSFlags >> 32) & X86II::VEX)
+  if ((TSFlags >> X86II::VEXShift) & X86II::VEX)
     HasVEXPrefix = true;
-  if ((TSFlags >> 32) & X86II::VEX_4V)
+  if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
     HasVEX_4V = true;
 
   
@@ -837,7 +847,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   
   unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
   
-  if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+  if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
     BaseOpcode = 0x0F;   // Weird 3DNow! encoding.
   
   unsigned SrcRegNum = 0;
@@ -994,7 +1004,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   if (CurOp != NumOps) {
     // The last source register of a 4 operand instruction in AVX is encoded
     // in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
-    if ((TSFlags >> 32) & X86II::VEX_I8IMM) {
+    if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
       const MCOperand &MO = MI.getOperand(CurOp++);
       bool IsExtReg =
         X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
@@ -1017,7 +1027,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     }
   }
 
-  if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+  if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
     EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
   
 
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 2f6bd88..37fb0fe 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -308,6 +308,33 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
   return 0;
 }
 
+const TargetRegisterClass*
+X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
+  const TargetRegisterClass *Super = RC;
+  TargetRegisterClass::sc_iterator I = RC->superclasses_begin();
+  do {
+    switch (Super->getID()) {
+    case X86::GR8RegClassID:
+    case X86::GR16RegClassID:
+    case X86::GR32RegClassID:
+    case X86::GR64RegClassID:
+    case X86::FR32RegClassID:
+    case X86::FR64RegClassID:
+    case X86::RFP32RegClassID:
+    case X86::RFP64RegClassID:
+    case X86::RFP80RegClassID:
+    case X86::VR128RegClassID:
+    case X86::VR256RegClassID:
+      // Don't return a super-class that would shrink the spill size.
+      // That can happen with the vector and float classes.
+      if (Super->getSize() == RC->getSize())
+        return Super;
+    }
+    Super = *I++;
+  } while (Super);
+  return RC;
+}
+
 const TargetRegisterClass *
 X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
   switch (Kind) {
@@ -337,7 +364,27 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
     else
       return &X86::GR32RegClass;
   }
-  return NULL;
+  return RC;
+}
+
+unsigned
+X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+                                     MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
+  switch (RC->getID()) {
+  default:
+    return 0;
+  case X86::GR32RegClassID:
+    return 4 - FPDiff;
+  case X86::GR64RegClassID:
+    return 12 - FPDiff;
+  case X86::VR128RegClassID:
+    return TM.getSubtarget<X86Subtarget>().is64Bit() ? 10 : 4;
+  case X86::VR64RegClassID:
+    return 4;
+  }
 }
 
 const unsigned *
@@ -450,7 +497,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
   // FIXME: It's more complicated than this...
   if (0 && requiresRealignment && MFI->hasVarSizedObjects())
     report_fatal_error(
-      "Stack realignment in presense of dynamic allocas is not supported");
+      "Stack realignment in presence of dynamic allocas is not supported");
 
   // If we've requested that we force align the stack do so now.
   if (ForceStackAlign)
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 064be64..9970c52 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -91,6 +91,9 @@ public:
   getMatchingSuperRegClass(const TargetRegisterClass *A,
                            const TargetRegisterClass *B, unsigned Idx) const;
 
+  const TargetRegisterClass*
+  getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
   /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
   /// values.
   const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
@@ -101,6 +104,9 @@ public:
   const TargetRegisterClass *
   getCrossCopyRegClass(const TargetRegisterClass *RC) const;
 
+  unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                               MachineFunction &MF) const;
+
   /// getCalleeSavedRegs - Return a null-terminated list of all of the
   /// callee-save registers on this target.
   const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 612fac2..fd7a247 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -46,7 +46,8 @@ let Namespace = "X86" in {
   def CL : Register<"cl">, DwarfRegNum<[2, 1, 1]>;
   def BL : Register<"bl">, DwarfRegNum<[3, 3, 3]>;
 
-  // X86-64 only
+  // X86-64 only, requires REX.
+  let CostPerUse = 1 in {
   def SIL : Register<"sil">, DwarfRegNum<[4, 6, 6]>;
   def DIL : Register<"dil">, DwarfRegNum<[5, 7, 7]>;
   def BPL : Register<"bpl">, DwarfRegNum<[6, 4, 5]>;
@@ -59,6 +60,7 @@ let Namespace = "X86" in {
   def R13B : Register<"r13b">, DwarfRegNum<[13, -2, -2]>;
   def R14B : Register<"r14b">, DwarfRegNum<[14, -2, -2]>;
   def R15B : Register<"r15b">, DwarfRegNum<[15, -2, -2]>;
+  }
 
   // High registers. On x86-64, these cannot be used in any instruction
   // with a REX prefix.
@@ -82,8 +84,8 @@ let Namespace = "X86" in {
   }
   def IP : Register<"ip">, DwarfRegNum<[16]>;
 
-  // X86-64 only
-  let SubRegIndices = [sub_8bit] in {
+  // X86-64 only, requires REX.
+  let SubRegIndices = [sub_8bit], CostPerUse = 1 in {
   def R8W  : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
   def R9W  : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>;
   def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>;
@@ -105,7 +107,8 @@ let Namespace = "X86" in {
   def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>;
   def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;
 
-  // X86-64 only
+  // X86-64 only, requires REX
+  let CostPerUse = 1 in {
   def R8D  : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>;
   def R9D  : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>;
   def R10D : RegisterWithSubRegs<"r10d", [R10W]>, DwarfRegNum<[10, -2, -2]>;
@@ -114,7 +117,7 @@ let Namespace = "X86" in {
   def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>;
   def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>;
   def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>;
-  }
+  }}
 
   // 64-bit registers, X86-64 only
   let SubRegIndices = [sub_32bit] in {
@@ -127,6 +130,8 @@ let Namespace = "X86" in {
   def RBP : RegisterWithSubRegs<"rbp", [EBP]>, DwarfRegNum<[6, -2, -2]>;
   def RSP : RegisterWithSubRegs<"rsp", [ESP]>, DwarfRegNum<[7, -2, -2]>;
 
+  // These also require REX.
+  let CostPerUse = 1 in {
   def R8  : RegisterWithSubRegs<"r8", [R8D]>, DwarfRegNum<[8, -2, -2]>;
   def R9  : RegisterWithSubRegs<"r9", [R9D]>, DwarfRegNum<[9, -2, -2]>;
   def R10 : RegisterWithSubRegs<"r10", [R10D]>, DwarfRegNum<[10, -2, -2]>;
@@ -136,7 +141,7 @@ let Namespace = "X86" in {
   def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>;
   def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>;
   def RIP : RegisterWithSubRegs<"rip", [EIP]>,  DwarfRegNum<[16, -2, -2]>;
-  }
+  }}
 
   // MMX Registers. These are actually aliased to ST0 .. ST7
   def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>;
@@ -170,6 +175,7 @@ let Namespace = "X86" in {
   def XMM7: Register<"xmm7">, DwarfRegNum<[24, 28, 28]>;
 
   // X86-64 only
+  let CostPerUse = 1 in {
   def XMM8:  Register<"xmm8">,  DwarfRegNum<[25, -2, -2]>;
   def XMM9:  Register<"xmm9">,  DwarfRegNum<[26, -2, -2]>;
   def XMM10: Register<"xmm10">, DwarfRegNum<[27, -2, -2]>;
@@ -178,7 +184,7 @@ let Namespace = "X86" in {
   def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
   def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
   def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
-  }
+  }}
 
   // YMM Registers, used by AVX instructions
   let SubRegIndices = [sub_xmm] in {
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 42e8193..02754f9 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -178,7 +178,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                         bool isVolatile, bool AlwaysInline,
                                          MachinePointerInfo DstPtrInfo,
                                          MachinePointerInfo SrcPtrInfo) const {
-  // This requires the copy size to be a constant, preferrably
+  // This requires the copy size to be a constant, preferably
   // within a subtarget-specific limit.
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
   if (!ConstantSize)
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 1ee7312..ba5864e 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -144,7 +144,8 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
 /// passed as the second argument. Otherwise it returns null.
 const char *X86Subtarget::getBZeroEntry() const {
   // Darwin 10 has a __bzero entry point for this purpose.
-  if (getDarwinVers() >= 10)
+  if (getTargetTriple().isMacOSX() &&
+      !getTargetTriple().isMacOSXVersionLT(10, 6))
     return "__bzero";
 
   return 0;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 0a62a02..286a798 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -165,9 +165,15 @@ public:
   bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
   bool hasVectorUAMem() const { return HasVectorUAMem; }
 
-  bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
-  bool isTargetFreeBSD() const { return TargetTriple.getOS() == Triple::FreeBSD; }
-  bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; }
+  const Triple &getTargetTriple() const { return TargetTriple; }
+
+  bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+  bool isTargetFreeBSD() const {
+    return TargetTriple.getOS() == Triple::FreeBSD;
+  }
+  bool isTargetSolaris() const {
+    return TargetTriple.getOS() == Triple::Solaris;
+  }
 
   // ELF is a reasonably sane default and the only other X86 targets we
   // support are Darwin and Windows. Just use "not those".
@@ -215,13 +221,6 @@ public:
     return PICStyle == PICStyles::StubDynamicNoPIC ||
            PICStyle == PICStyles::StubPIC; }
 
-  /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard,
-  /// 10 = Snow Leopard, etc.
-  unsigned getDarwinVers() const {
-    if (isTargetDarwin()) return TargetTriple.getDarwinMajorNumber();
-    return 0;
-  }
-
   /// ClassifyGlobalReference - Classify a global variable reference for the
   /// current subtarget according to how we should reference it in a non-pcrel
   /// context.
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 889c824..7483329 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -26,19 +26,18 @@ using namespace llvm;
 
 static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
-  switch (TheTriple.getOS()) {
-  case Triple::Darwin:
-    return new X86MCAsmInfoDarwin(TheTriple);
-  case Triple::MinGW32:
-  case Triple::Cygwin:
-  case Triple::Win32:
-    if (TheTriple.getEnvironment() == Triple::MachO)
-      return new X86MCAsmInfoDarwin(TheTriple);
+
+  if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
+    if (TheTriple.getArch() == Triple::x86_64)
+      return new X86_64MCAsmInfoDarwin(TheTriple);
     else
-      return new X86MCAsmInfoCOFF(TheTriple);
-  default:
-    return new X86ELFMCAsmInfo(TheTriple);
+      return new X86MCAsmInfoDarwin(TheTriple);
   }
+
+  if (TheTriple.isOSWindows())
+    return new X86MCAsmInfoCOFF(TheTriple);
+
+  return new X86ELFMCAsmInfo(TheTriple);
 }
 
 static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
@@ -48,19 +47,14 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
                                     bool RelaxAll,
                                     bool NoExecStack) {
   Triple TheTriple(TT);
-  switch (TheTriple.getOS()) {
-  case Triple::Darwin:
+
+  if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
     return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
-  case Triple::MinGW32:
-  case Triple::Cygwin:
-  case Triple::Win32:
-    if (TheTriple.getEnvironment() == Triple::MachO)
-      return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
-    else
-      return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
-  default:
-    return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
-  }
+
+  if (TheTriple.isOSWindows())
+    return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
+
+  return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
 }
 
 extern "C" void LLVMInitializeX86Target() {
@@ -96,11 +90,11 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
                                          const std::string &FS)
   : X86TargetMachine(T, TT, FS, false),
     DataLayout(getSubtargetImpl()->isTargetDarwin() ?
-               "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32" :
+               "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" :
                (getSubtargetImpl()->isTargetCygMing() ||
                 getSubtargetImpl()->isTargetWindows()) ?
-               "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32" :
-               "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"),
+               "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-n8:16:32" :
+               "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-n8:16:32"),
     InstrInfo(*this),
     TSInfo(*this),
     TLInfo(*this),
@@ -111,7 +105,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
 X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
                                          const std::string &FS)
   : X86TargetMachine(T, TT, FS, true),
-    DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"),
+    DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"),
     InstrInfo(*this),
     TSInfo(*this),
     TLInfo(*this),
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index c15dfbb..1231798 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -38,6 +38,12 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
     getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
 }
 
+MCSymbol *X8664_MachoTargetObjectFile::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+                        MachineModuleInfo *MMI) const {
+  return Mang->getSymbol(GV);
+}
+
 unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const {
   if (TM.getRelocationModel() == Reloc::PIC_)
     return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
@@ -52,7 +58,7 @@ unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const {
     return DW_EH_PE_absptr;
 }
 
-unsigned X8632_ELFTargetObjectFile::getFDEEncoding() const {
+unsigned X8632_ELFTargetObjectFile::getFDEEncoding(bool FDE) const {
   if (TM.getRelocationModel() == Reloc::PIC_)
     return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
   else
@@ -91,17 +97,14 @@ unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const {
   return DW_EH_PE_absptr;
 }
 
-unsigned X8664_ELFTargetObjectFile::getFDEEncoding() const {
-  CodeModel::Model Model = TM.getCodeModel();
-  if (TM.getRelocationModel() == Reloc::PIC_)
-    return DW_EH_PE_pcrel | (Model == CodeModel::Small ||
-                             Model == CodeModel::Medium ?
-                             DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+unsigned X8664_ELFTargetObjectFile::getFDEEncoding(bool CFI) const {
+  if (CFI)
+    return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
 
-  if (Model == CodeModel::Small || Model == CodeModel::Medium)
-    return DW_EH_PE_udata4;
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
 
-  return DW_EH_PE_absptr;
+  return DW_EH_PE_udata4;
 }
 
 unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const {
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index f2fd49c..e21b5bf 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -25,6 +25,12 @@ namespace llvm {
     getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
                                    MachineModuleInfo *MMI, unsigned Encoding,
                                    MCStreamer &Streamer) const;
+
+    // getCFIPersonalitySymbol - The symbol that gets passed to
+    // .cfi_personality.
+    virtual MCSymbol *
+    getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+                            MachineModuleInfo *MMI) const;
   };
 
   class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
@@ -34,7 +40,7 @@ namespace llvm {
       :TM(tm) { }
     virtual unsigned getPersonalityEncoding() const;
     virtual unsigned getLSDAEncoding() const;
-    virtual unsigned getFDEEncoding() const;
+    virtual unsigned getFDEEncoding(bool CFI) const;
     virtual unsigned getTTypeEncoding() const;
   };
 
@@ -45,7 +51,7 @@ namespace llvm {
       :TM(tm) { }
     virtual unsigned getPersonalityEncoding() const;
     virtual unsigned getLSDAEncoding() const;
-    virtual unsigned getFDEEncoding() const;
+    virtual unsigned getFDEEncoding(bool CFI) const;
     virtual unsigned getTTypeEncoding() const;
   };
 
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index fc8a07a..6bec9f9 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -30,8 +30,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include <queue>
-#include <set>
 using namespace llvm;
 
 /// XCoreDAGToDAGISel - XCore specific code to select XCore machine
@@ -49,7 +47,8 @@ namespace {
         Subtarget(*TM.getSubtargetImpl()) { }
 
     SDNode *Select(SDNode *N);
-    
+    SDNode *SelectBRIND(SDNode *N);
+
     /// getI32Imm - Return a target constant with the specified value, of type
     /// i32.
     inline SDValue getI32Imm(unsigned Imm) {
@@ -154,62 +153,133 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
 
 SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
-  EVT NVT = N->getValueType(0);
-  if (NVT == MVT::i32) {
-    switch (N->getOpcode()) {
-      default: break;
-      case ISD::Constant: {
-        uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue();
-        if (immMskBitp(N)) {
-          // Transformation function: get the size of a mask
-          // Look for the first non-zero bit
-          SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val));
-          return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
-                                        MVT::i32, MskSize);
-        }
-        else if (!isUInt<16>(Val)) {
-          SDValue CPIdx =
-            CurDAG->getTargetConstantPool(ConstantInt::get(
-                                  Type::getInt32Ty(*CurDAG->getContext()), Val),
-                                          TLI.getPointerTy());
-          return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, 
-                                        MVT::Other, CPIdx, 
-                                        CurDAG->getEntryNode());
-        }
-        break;
-      }
-      case XCoreISD::LADD: {
-        SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
-                            N->getOperand(2) };
-        return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
-                                      Ops, 3);
-      }
-      case XCoreISD::LSUB: {
-        SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
-                            N->getOperand(2) };
-        return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
-                                      Ops, 3);
-      }
-      case XCoreISD::MACCU: {
-        SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
-                          N->getOperand(2), N->getOperand(3) };
-        return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32,
-                                      Ops, 4);
-      }
-      case XCoreISD::MACCS: {
-        SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
-                          N->getOperand(2), N->getOperand(3) };
-        return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32,
-                                      Ops, 4);
-      }
-      case XCoreISD::LMUL: {
-        SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
-                          N->getOperand(2), N->getOperand(3) };
-        return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
-                                      Ops, 4);
-      }
-      // Other cases are autogenerated.
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::Constant: {
+    uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue();
+    if (immMskBitp(N)) {
+      // Transformation function: get the size of a mask
+      // Look for the first non-zero bit
+      SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val));
+      return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
+                                    MVT::i32, MskSize);
+    }
+    else if (!isUInt<16>(Val)) {
+      SDValue CPIdx =
+        CurDAG->getTargetConstantPool(ConstantInt::get(
+                              Type::getInt32Ty(*CurDAG->getContext()), Val),
+                                      TLI.getPointerTy());
+      return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, 
+                                    MVT::Other, CPIdx, 
+                                    CurDAG->getEntryNode());
     }
+    break;
+  }
+  case XCoreISD::LADD: {
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                        N->getOperand(2) };
+    return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
+                                  Ops, 3);
+  }
+  case XCoreISD::LSUB: {
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                        N->getOperand(2) };
+    return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
+                                  Ops, 3);
+  }
+  case XCoreISD::MACCU: {
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                      N->getOperand(2), N->getOperand(3) };
+    return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32,
+                                  Ops, 4);
+  }
+  case XCoreISD::MACCS: {
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                      N->getOperand(2), N->getOperand(3) };
+    return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32,
+                                  Ops, 4);
+  }
+  case XCoreISD::LMUL: {
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                      N->getOperand(2), N->getOperand(3) };
+    return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
+                                  Ops, 4);
+  }
+  case ISD::BRIND:
+    if (SDNode *ResNode = SelectBRIND(N))
+      return ResNode;
+    break;
+  // Other cases are autogenerated.
   }
   return SelectCode(N);
 }
+
+/// Given a chain return a new chain where any appearance of Old is replaced
+/// by New. There must be at most one instruction between Old and Chain and
+/// this instruction must be a TokenFactor. Returns an empty SDValue if 
+/// these conditions don't hold.
+static SDValue
+replaceInChain(SelectionDAG *CurDAG, SDValue Chain, SDValue Old, SDValue New)
+{
+  if (Chain == Old)
+    return New;
+  if (Chain->getOpcode() != ISD::TokenFactor)
+    return SDValue();
+  SmallVector<SDValue, 8> Ops;
+  bool found = false;
+  for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) {
+    if (Chain->getOperand(i) == Old) {
+      Ops.push_back(New);
+      found = true;
+    } else {
+      Ops.push_back(Chain->getOperand(i));
+    }
+  }
+  if (!found)
+    return SDValue();
+  return CurDAG->getNode(ISD::TokenFactor, Chain->getDebugLoc(), MVT::Other,
+                         &Ops[0], Ops.size());
+}
+
+SDNode *XCoreDAGToDAGISel::SelectBRIND(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  // (brind (int_xcore_checkevent (addr)))
+  SDValue Chain = N->getOperand(0);
+  SDValue Addr = N->getOperand(1);
+  if (Addr->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+    return 0;
+  unsigned IntNo = cast<ConstantSDNode>(Addr->getOperand(1))->getZExtValue();
+  if (IntNo != Intrinsic::xcore_checkevent)
+    return 0;
+  SDValue nextAddr = Addr->getOperand(2);
+  SDValue CheckEventChainOut(Addr.getNode(), 1);
+  if (!CheckEventChainOut.use_empty()) {
+    // If the chain out of the checkevent intrinsic is an operand of the
+    // indirect branch or used in a TokenFactor which is the operand of the
+    // indirect branch then build a new chain which uses the chain coming into
+    // the checkevent intrinsic instead.
+    SDValue CheckEventChainIn = Addr->getOperand(0);
+    SDValue NewChain = replaceInChain(CurDAG, Chain, CheckEventChainOut,
+                                      CheckEventChainIn);
+    if (!NewChain.getNode())
+      return 0;
+    Chain = NewChain;
+  }
+  // Enable events on the thread using setsr 1 and then disable them immediately
+  // after with clrsr 1. If any resources owned by the thread are ready an event
+  // will be taken. If no resource is ready we branch to the address which was
+  // the operand to the checkevent intrinsic.
+  SDValue constOne = getI32Imm(1);
+  SDValue Glue =
+    SDValue(CurDAG->getMachineNode(XCore::SETSR_branch_u6, dl, MVT::Glue,
+                                   constOne, Chain), 0);
+  Glue =
+    SDValue(CurDAG->getMachineNode(XCore::CLRSR_branch_u6, dl, MVT::Glue,
+                                   constOne, Glue), 0);
+  if (nextAddr->getOpcode() == XCoreISD::PCRelativeWrapper &&
+      nextAddr->getOperand(0)->getOpcode() == ISD::TargetBlockAddress) {
+    return CurDAG->SelectNodeTo(N, XCore::BRFU_lu6, MVT::Other,
+                                nextAddr->getOperand(0), Glue);
+  }
+  return CurDAG->SelectNodeTo(N, XCore::BAU_1r, MVT::Other, nextAddr, Glue);
+}
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 4817787..5987e8b 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -37,8 +37,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/VectorExtras.h"
-#include <queue>
-#include <set>
 using namespace llvm;
 
 const char *XCoreTargetLowering::
@@ -967,7 +965,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
 
   // Build a sequence of copy-to-reg nodes chained together with token
   // chain and flag operands which copy the outgoing args into registers.
-  // The InFlag in necessary since all emited instructions must be
+  // The InFlag in necessary since all emitted instructions must be
   // stuck together.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index ecdd4cb..789546e 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -308,6 +308,16 @@ multiclass FU6_LU6<string OpcStr, SDNode OpNode> {
                  !strconcat(OpcStr, " $b"),
                  [(OpNode immU16:$b)]>;
 }
+multiclass FU6_LU6_int<string OpcStr, Intrinsic Int> {
+  def _u6: _FU6<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 [(Int immU6:$b)]>;
+  def _lu6: _FLU6<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 [(Int immU16:$b)]>;
+}
 
 multiclass FU6_LU6_np<string OpcStr> {
   def _u6: _FU6<
@@ -638,8 +648,8 @@ defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
 }
 }
 
-// TODO extdp, kentsp, krestsp, blat, setsr
-// clrsr, getsr, kalli
+// TODO extdp, kentsp, krestsp, blat
+// getsr, kalli
 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
 def BRBU_u6 : _FU6<
                  (outs),
@@ -678,6 +688,17 @@ def LDAWCP_lu6: _FLRU6<
                     "ldaw r11, cp[$a]",
                     [(set R11, ADDRcpii:$a)]>;
 
+defm SETSR : FU6_LU6_int<"setsr", int_xcore_setsr>;
+
+defm CLRSR : FU6_LU6_int<"clrsr", int_xcore_clrsr>;
+
+// setsr may cause a branch if it is used to enable events. clrsr may
+// branch if it is executed while events are enabled.
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in {
+defm SETSR_branch : FU6_LU6_np<"setsr">;
+defm CLRSR_branch : FU6_LU6_np<"clrsr">;
+}
+
 // U10
 // TODO ldwcpl, blacp
 
@@ -718,7 +739,7 @@ def BL_lu10 : _FLU10<
 }
 
 // Two operand short
-// TODO getr, getst
+// TODO eet, eef, testwct, tsetmr, sext (reg), zext (reg)
 def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
                  "not $dst, $b",
                  [(set GRRegs:$dst, (not GRRegs:$b))]>;
@@ -727,8 +748,6 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
                  "neg $dst, $b",
                  [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
 
-// TODO setd, eet, eef, testwct, tinitpc, tinitdp,
-// tinitsp, tinitcp, tsetmr, sext (reg), zext (reg)
 let Constraints = "$src1 = $dst" in {
 let neverHasSideEffects = 1 in
 def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
@@ -816,9 +835,29 @@ def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
                  "setd res[$r], $val",
                  [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
 
+def GETST_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+                    "getst $dst, res[$r]",
+                    [(set GRRegs:$dst, (int_xcore_getst GRRegs:$r))]>;
+
+def INITSP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+                     "init t[$t]:sp, $src",
+                     [(int_xcore_initsp GRRegs:$t, GRRegs:$src)]>;
+
+def INITPC_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+                     "init t[$t]:pc, $src",
+                     [(int_xcore_initpc GRRegs:$t, GRRegs:$src)]>;
+
+def INITCP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+                     "init t[$t]:cp, $src",
+                     [(int_xcore_initcp GRRegs:$t, GRRegs:$src)]>;
+
+def INITDP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+                     "init t[$t]:dp, $src",
+                     [(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>;
+
 // Two operand long
-// TODO setclk, setrdy, setpsc, endin, peek,
-// getd, testlcl, tinitlr, getps, setps
+// TODO endin, peek,
+// getd, testlcl
 def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
                  "bitrev $dst, $src",
                  [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
@@ -839,10 +878,41 @@ def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
                   "settw res[$r], $val",
                   [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>;
 
+def GETPS_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+                 "get $dst, ps[$src]",
+                 [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>;
+
+def SETPS_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+                 "set ps[$src1], $src2",
+                 [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>;
+
+def INITLR_l2r : _FL2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+                       "init t[$t]:lr, $src",
+                       [(int_xcore_initlr GRRegs:$t, GRRegs:$src)]>;
+
+def SETCLK_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+                       "setclk res[$src1], $src2",
+                       [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>;
+
+def SETRDY_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+                       "setrdy res[$src1], $src2",
+                       [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>;
+
+def SETPSC_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+                       "setpsc res[$src1], $src2",
+                       [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
+
 // One operand short
-// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, clrtp
+// TODO edu, eeu, waitet, waitef, tstart, clrtp
 // setdp, setcp, setev, kcall
 // dgetreg
+def MSYNC_1r : _F1R<(outs), (ins GRRegs:$i),
+                    "msync res[$i]",
+		    [(int_xcore_msync GRRegs:$i)]>;
+def MJOIN_1r : _F1R<(outs), (ins GRRegs:$i),
+                    "mjoin res[$i]",
+		    [(int_xcore_mjoin GRRegs:$i)]>;
+
 let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
 def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
                  "bau $addr",
@@ -899,7 +969,7 @@ def EEU_1r : _F1R<(outs), (ins GRRegs:$r),
                [(int_xcore_eeu GRRegs:$r)]>;
 
 // Zero operand short
-// TODO ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
+// TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
 // stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret,
 // dentsp, drestsp
 
@@ -910,6 +980,10 @@ def GETID_0R : _F0R<(outs), (ins),
                  "get r11, id",
                  [(set R11, (int_xcore_getid))]>;
 
+def SSYNC_0r : _F0R<(outs), (ins),
+                    "ssync",
+		    [(int_xcore_ssync)]>;
+
 let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
     hasSideEffects = 1 in
 def WAITEU_0R : _F0R<(outs), (ins),
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 56c0879..0287a51 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -104,6 +104,11 @@ XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
   return TFI->hasFP(MF);
 }
 
+bool
+XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+  return false;
+}
+
 // This function eliminates ADJCALLSTACKDOWN,
 // ADJCALLSTACKUP pseudo instructions
 void XCoreRegisterInfo::
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 2185755..770483b 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -48,6 +48,8 @@ public:
   
   bool requiresRegisterScavenging(const MachineFunction &MF) const;
 
+  bool useFPForScavengingIndex(const MachineFunction &MF) const;
+
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 0c650cf..54a7f67 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -771,8 +771,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   // function empty.
   NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
 
-  // Loop over the argument list, transfering uses of the old arguments over to
-  // the new arguments, also transfering over the names as well.
+  // Loop over the argument list, transferring uses of the old arguments over to
+  // the new arguments, also transferring over the names as well.
   //
   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
        I2 = NF->arg_begin(); I != E; ++I) {
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index efdeec5..179b150 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -20,5 +20,4 @@ add_llvm_library(LLVMipo
   PruneEH.cpp
   StripDeadPrototypes.cpp
   StripSymbols.cpp
-  StructRetPromotion.cpp
   )
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index b423221..d4eaf0c 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -49,7 +49,7 @@ namespace {
 
     /// Struct that represents (part of) either a return value or a function
     /// argument.  Used so that arguments and return values can be used
-    /// interchangably.
+    /// interchangeably.
     struct RetOrArg {
       RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx),
                IsArg(IsArg) {}
@@ -273,8 +273,8 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
   // function empty.
   NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList());
 
-  // Loop over the argument list, transfering uses of the old arguments over to
-  // the new arguments, also transfering over the names as well.  While we're at
+  // Loop over the argument list, transferring uses of the old arguments over to
+  // the new arguments, also transferring over the names as well.  While we're at
   // it, remove the dead arguments from the DeadArguments list.
   //
   for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
@@ -294,7 +294,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
 /// instead.
 bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
 {
-  if (Fn.isDeclaration())
+  if (Fn.isDeclaration() || Fn.mayBeOverridden())
     return false;
 
   // Functions with local linkage should already have been handled.
@@ -379,7 +379,7 @@ DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U,
       // The value is returned from a function. It's only live when the
       // function's return value is live. We use RetValNum here, for the case
       // that U is really a use of an insertvalue instruction that uses the
-      // orginal Use.
+      // original Use.
       RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum);
       // We might be live, depending on the liveness of Use.
       return MarkIfNotLive(Use, MaybeLiveUses);
@@ -894,8 +894,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   // function empty.
   NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
 
-  // Loop over the argument list, transfering uses of the old arguments over to
-  // the new arguments, also transfering over the names as well.
+  // Loop over the argument list, transferring uses of the old arguments over to
+  // the new arguments, also transferring over the names as well.
   i = 0;
   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
        I2 = NF->arg_begin(); I != E; ++I, ++i)
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index d4cb712..ded58ac 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
+#include "llvm/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
@@ -54,6 +55,7 @@ STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
 STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
 STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
 STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
+STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
 
 namespace {
   struct GlobalStatus;
@@ -77,6 +79,7 @@ namespace {
     bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
                                const SmallPtrSet<const PHINode*, 16> &PHIUsers,
                                const GlobalStatus &GS);
+    bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
   };
 }
 
@@ -1191,9 +1194,11 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
     const StructType *ST =
       cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
 
-    Result =
+    PHINode *NewPN =
      PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+                     PN->getNumIncomingValues(),
                      PN->getName()+".f"+Twine(FieldNo), PN);
+    Result = NewPN;
     PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
   } else {
     llvm_unreachable("Unknown usable value");
@@ -1940,36 +1945,24 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
   return Changed;
 }
 
-/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all
+/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all
 /// initializers have an init priority of 65535.
 GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
   GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
   if (GV == 0) return 0;
   
-  // Found it, verify it's an array of { int, void()* }.
-  const ArrayType *ATy =dyn_cast<ArrayType>(GV->getType()->getElementType());
-  if (!ATy) return 0;
-  const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
-  if (!STy || STy->getNumElements() != 2 ||
-      !STy->getElementType(0)->isIntegerTy(32)) return 0;
-  const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
-  if (!PFTy) return 0;
-  const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
-  if (!FTy || !FTy->getReturnType()->isVoidTy() ||
-      FTy->isVarArg() || FTy->getNumParams() != 0)
-    return 0;
-
   // Verify that the initializer is simple enough for us to handle. We are
   // only allowed to optimize the initializer if it is unique.
   if (!GV->hasUniqueInitializer()) return 0;
-  
-  ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
-  if (!CA) return 0;
-  
+
+  if (isa<ConstantAggregateZero>(GV->getInitializer()))
+    return GV;
+  ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+
   for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
-    ConstantStruct *CS = dyn_cast<ConstantStruct>(*i);
-    if (CS == 0) return 0;
-    
+    if (isa<ConstantAggregateZero>(*i))
+      continue;
+    ConstantStruct *CS = cast<ConstantStruct>(*i);
     if (isa<ConstantPointerNull>(CS->getOperand(1)))
       continue;
 
@@ -1978,8 +1971,8 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
       return 0;
 
     // Init priority must be standard.
-    ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
-    if (!CI || CI->getZExtValue() != 65535)
+    ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
+    if (CI->getZExtValue() != 65535)
       return 0;
   }
 
@@ -1989,6 +1982,8 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
 /// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
 /// return a list of the functions and null terminator as a vector.
 static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
+  if (GV->getInitializer()->isNullValue())
+    return std::vector<Function*>();
   ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
   std::vector<Function*> Result;
   Result.reserve(CA->getNumOperands());
@@ -2019,7 +2014,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
       const PointerType *PFTy = PointerType::getUnqual(FTy);
       CSVals[1] = Constant::getNullValue(PFTy);
       CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
-                                   2147483647);
+                                   0x7fffffff);
     }
     CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false));
   }
@@ -2696,12 +2691,126 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
   return Changed;
 }
 
+static Function *FindCXAAtExit(Module &M) {
+  Function *Fn = M.getFunction("__cxa_atexit");
+  
+  if (!Fn)
+    return 0;
+  
+  const FunctionType *FTy = Fn->getFunctionType();
+  
+  // Checking that the function has the right return type, the right number of 
+  // parameters and that they all have pointer types should be enough.
+  if (!FTy->getReturnType()->isIntegerTy() ||
+      FTy->getNumParams() != 3 ||
+      !FTy->getParamType(0)->isPointerTy() ||
+      !FTy->getParamType(1)->isPointerTy() ||
+      !FTy->getParamType(2)->isPointerTy())
+    return 0;
+
+  return Fn;
+}
+
+/// cxxDtorIsEmpty - Returns whether the given function is an empty C++
+/// destructor and can therefore be eliminated.
+/// Note that we assume that other optimization passes have already simplified
+/// the code so we only look for a function with a single basic block, where
+/// the only allowed instructions are 'ret' or 'call' to empty C++ dtor.
+static bool cxxDtorIsEmpty(const Function &Fn,
+                           SmallPtrSet<const Function *, 8> &CalledFunctions) {
+  // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
+  // nounwind, but that doesn't seem worth doing.
+  if (Fn.isDeclaration())
+    return false;
+
+  if (++Fn.begin() != Fn.end())
+    return false;
+
+  const BasicBlock &EntryBlock = Fn.getEntryBlock();
+  for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
+       I != E; ++I) {
+    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+      // Ignore debug intrinsics.
+      if (isa<DbgInfoIntrinsic>(CI))
+        continue;
+
+      const Function *CalledFn = CI->getCalledFunction();
+
+      if (!CalledFn)
+        return false;
+
+      SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
+
+      // Don't treat recursive functions as empty.
+      if (!NewCalledFunctions.insert(CalledFn))
+        return false;
+
+      if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
+        return false;
+    } else if (isa<ReturnInst>(*I))
+      return true;
+    else
+      return false;
+  }
+
+  return false;
+}
+
+bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
+  /// Itanium C++ ABI p3.3.5:
+  ///
+  ///   After constructing a global (or local static) object, that will require
+  ///   destruction on exit, a termination function is registered as follows:
+  ///
+  ///   extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
+  ///
+  ///   This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
+  ///   call f(p) when DSO d is unloaded, before all such termination calls
+  ///   registered before this one. It returns zero if registration is
+  ///   successful, nonzero on failure.
+
+  // This pass will look for calls to __cxa_atexit where the function is trivial
+  // and remove them.
+  bool Changed = false;
+
+  for (Function::use_iterator I = CXAAtExitFn->use_begin(), 
+       E = CXAAtExitFn->use_end(); I != E;) {
+    // We're only interested in calls. Theoretically, we could handle invoke
+    // instructions as well, but neither llvm-gcc nor clang generate invokes
+    // to __cxa_atexit.
+    CallInst *CI = dyn_cast<CallInst>(*I++);
+    if (!CI)
+      continue;
+
+    Function *DtorFn = 
+      dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
+    if (!DtorFn)
+      continue;
+
+    SmallPtrSet<const Function *, 8> CalledFunctions;
+    if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
+      continue;
+
+    // Just remove the call.
+    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+    CI->eraseFromParent();
+
+    ++NumCXXDtorsRemoved;
+
+    Changed |= true;
+  }
+
+  return Changed;
+}
+
 bool GlobalOpt::runOnModule(Module &M) {
   bool Changed = false;
 
   // Try to find the llvm.globalctors list.
   GlobalVariable *GlobalCtors = FindGlobalCtors(M);
 
+  Function *CXAAtExitFn = FindCXAAtExit(M);
+
   bool LocalChange = true;
   while (LocalChange) {
     LocalChange = false;
@@ -2718,6 +2827,11 @@ bool GlobalOpt::runOnModule(Module &M) {
 
     // Resolve aliases, when possible.
     LocalChange |= OptimizeGlobalAliases(M);
+
+    // Try to remove trivial global destructors.
+    if (CXAAtExitFn)
+      LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
+
     Changed |= LocalChange;
   }
 
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index c7c2939..25c0134 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -186,7 +186,7 @@ bool IPCP::PropagateConstantReturn(Function &F) {
         // Find the returned value
         Value *V;
         if (!STy)
-          V = RI->getOperand(i);
+          V = RI->getOperand(0);
         else
           V = FindInsertedValue(RI->getOperand(0), i);
 
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index fbe90ce..21dcb51 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -45,7 +45,6 @@ void llvm::initializeIPO(PassRegistry &Registry) {
   initializeStripDebugDeclarePass(Registry);
   initializeStripDeadDebugInfoPass(Registry);
   initializeStripNonDebugSymbolsPass(Registry);
-  initializeSRETPromotionPass(Registry);
 }
 
 void LLVMInitializeIPO(LLVMPassRegistryRef R) {
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 37eafd7..57f3e77 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -29,7 +29,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
-#include <set>
 using namespace llvm;
 
 STATISTIC(NumInlined, "Number of functions inlined");
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 9b9ebad..7cb1d18 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -126,6 +126,8 @@ bool InternalizePass::runOnModule(Module &M) {
   // FIXME: maybe use private linkage?
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
     if (!I->isDeclaration() &&         // Function must be defined here
+        // Available externally is really just a "declaration with a body".
+        !I->hasAvailableExternallyLinkage() &&
         !I->hasLocalLinkage() &&  // Can't already have internal linkage
         !ExternalNames.count(I->getName())) {// Not marked to keep external?
       I->setLinkage(GlobalValue::InternalLinkage);
@@ -144,9 +146,6 @@ bool InternalizePass::runOnModule(Module &M) {
 
   // Never internalize anchors used by the machine module info, else the info
   // won't find them.  (see MachineModuleInfo.)
-  ExternalNames.insert("llvm.dbg.compile_units");
-  ExternalNames.insert("llvm.dbg.global_variables");
-  ExternalNames.insert("llvm.dbg.subprograms");
   ExternalNames.insert("llvm.global_ctors");
   ExternalNames.insert("llvm.global_dtors");
   ExternalNames.insert("llvm.noinline");
diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp
index b545f0b..52ecf17 100644
--- a/lib/Transforms/IPO/LowerSetJmp.cpp
+++ b/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -430,7 +430,7 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
 
   // This PHI node will be in the new block created from the
   // splitBasicBlock call.
-  PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()),
+  PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), 2,
                                  "SetJmpReturn", Inst);
 
   // Coming from a call to setjmp, the return is 0.
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index cccffca..f741443 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -55,6 +55,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
+#include "llvm/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
@@ -125,7 +126,7 @@ private:
 const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0);
 const ComparableFunction ComparableFunction::TombstoneKey =
     ComparableFunction(1);
-TargetData * const ComparableFunction::LookupOnly = (TargetData*)(-1);
+TargetData *const ComparableFunction::LookupOnly = (TargetData*)(-1);
 
 }
 
@@ -212,7 +213,7 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1,
     return false;
   }
 
-  switch(Ty1->getTypeID()) {
+  switch (Ty1->getTypeID()) {
   default:
     llvm_unreachable("Unknown type!");
     // Fall through in Release mode.
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 2afd029..d9d1d10 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -95,7 +95,7 @@ Function* PartialInliner::unswitchFunction(Function* F) {
     PHINode* OldPhi = dyn_cast<PHINode>(I);
     if (!OldPhi) break;
     
-    PHINode* retPhi = PHINode::Create(OldPhi->getType(), "", Ins);
+    PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
     OldPhi->replaceAllUsesWith(retPhi);
     Ins = newReturnBlock->getFirstNonPHI();
     
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index d91c2c4..9470180 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -27,7 +27,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CFG.h"
-#include <set>
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp
deleted file mode 100644
index 584deac..0000000
--- a/lib/Transforms/IPO/StructRetPromotion.cpp
+++ /dev/null
@@ -1,357 +0,0 @@
-//===-- StructRetPromotion.cpp - Promote sret arguments -------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass finds functions that return a struct (using a pointer to the struct
-// as the first argument of the function, marked with the 'sret' attribute) and
-// replaces them with a new function that simply returns each of the elements of
-// that struct (using multiple return values).
-//
-// This pass works under a number of conditions:
-//  1. The returned struct must not contain other structs
-//  2. The returned struct must only be used to load values from
-//  3. The placeholder struct passed in is the result of an alloca
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "sretpromotion"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/CallGraphSCCPass.h"
-#include "llvm/Instructions.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-STATISTIC(NumRejectedSRETUses , "Number of sret rejected due to unexpected uses");
-STATISTIC(NumSRET , "Number of sret promoted");
-namespace {
-  /// SRETPromotion - This pass removes sret parameter and updates
-  /// function to use multiple return value.
-  ///
-  struct SRETPromotion : public CallGraphSCCPass {
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      CallGraphSCCPass::getAnalysisUsage(AU);
-    }
-
-    virtual bool runOnSCC(CallGraphSCC &SCC);
-    static char ID; // Pass identification, replacement for typeid
-    SRETPromotion() : CallGraphSCCPass(ID) {
-      initializeSRETPromotionPass(*PassRegistry::getPassRegistry());
-    }
-
-  private:
-    CallGraphNode *PromoteReturn(CallGraphNode *CGN);
-    bool isSafeToUpdateAllCallers(Function *F);
-    Function *cloneFunctionBody(Function *F, const StructType *STy);
-    CallGraphNode *updateCallSites(Function *F, Function *NF);
-  };
-}
-
-char SRETPromotion::ID = 0;
-INITIALIZE_PASS_BEGIN(SRETPromotion, "sretpromotion",
-                "Promote sret arguments to multiple ret values", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
-INITIALIZE_PASS_END(SRETPromotion, "sretpromotion",
-                "Promote sret arguments to multiple ret values", false, false)
-
-Pass *llvm::createStructRetPromotionPass() {
-  return new SRETPromotion();
-}
-
-bool SRETPromotion::runOnSCC(CallGraphSCC &SCC) {
-  bool Changed = false;
-
-  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
-    if (CallGraphNode *NewNode = PromoteReturn(*I)) {
-      SCC.ReplaceNode(*I, NewNode);
-      Changed = true;
-    }
-
-  return Changed;
-}
-
-/// PromoteReturn - This method promotes function that uses StructRet paramater 
-/// into a function that uses multiple return values.
-CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
-  Function *F = CGN->getFunction();
-
-  if (!F || F->isDeclaration() || !F->hasLocalLinkage())
-    return 0;
-
-  // Make sure that function returns struct.
-  if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn())
-    return 0;
-
-  DEBUG(dbgs() << "SretPromotion: Looking at sret function " 
-        << F->getName() << "\n");
-
-  assert(F->getReturnType()->isVoidTy() && "Invalid function return type");
-  Function::arg_iterator AI = F->arg_begin();
-  const llvm::PointerType *FArgType = dyn_cast<PointerType>(AI->getType());
-  assert(FArgType && "Invalid sret parameter type");
-  const llvm::StructType *STy = 
-    dyn_cast<StructType>(FArgType->getElementType());
-  assert(STy && "Invalid sret parameter element type");
-
-  // Check if it is ok to perform this promotion.
-  if (isSafeToUpdateAllCallers(F) == false) {
-    DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n");
-    ++NumRejectedSRETUses;
-    return 0;
-  }
-
-  DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n");
-  ++NumSRET;
-  // [1] Replace use of sret parameter 
-  AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", 
-                                         F->getEntryBlock().begin());
-  Value *NFirstArg = F->arg_begin();
-  NFirstArg->replaceAllUsesWith(TheAlloca);
-
-  // [2] Find and replace ret instructions
-  for (Function::iterator FI = F->begin(), FE = F->end();  FI != FE; ++FI) 
-    for(BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
-      Instruction *I = BI;
-      ++BI;
-      if (isa<ReturnInst>(I)) {
-        Value *NV = new LoadInst(TheAlloca, "mrv.ld", I);
-        ReturnInst *NR = ReturnInst::Create(F->getContext(), NV, I);
-        I->replaceAllUsesWith(NR);
-        I->eraseFromParent();
-      }
-    }
-
-  // [3] Create the new function body and insert it into the module.
-  Function *NF = cloneFunctionBody(F, STy);
-
-  // [4] Update all call sites to use new function
-  CallGraphNode *NF_CFN = updateCallSites(F, NF);
-
-  CallGraph &CG = getAnalysis<CallGraph>();
-  NF_CFN->stealCalledFunctionsFrom(CG[F]);
-
-  delete CG.removeFunctionFromModule(F);
-  return NF_CFN;
-}
-
-// Check if it is ok to perform this promotion.
-bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) {
-
-  if (F->use_empty())
-    // No users. OK to modify signature.
-    return true;
-
-  for (Value::use_iterator FnUseI = F->use_begin(), FnUseE = F->use_end();
-       FnUseI != FnUseE; ++FnUseI) {
-    // The function is passed in as an argument to (possibly) another function,
-    // we can't change it!
-    CallSite CS(*FnUseI);
-    Instruction *Call = CS.getInstruction();
-    // The function is used by something else than a call or invoke instruction,
-    // we can't change it!
-    if (!Call || !CS.isCallee(FnUseI))
-      return false;
-    CallSite::arg_iterator AI = CS.arg_begin();
-    Value *FirstArg = *AI;
-
-    if (!isa<AllocaInst>(FirstArg))
-      return false;
-
-    // Check FirstArg's users.
-    for (Value::use_iterator ArgI = FirstArg->use_begin(), 
-           ArgE = FirstArg->use_end(); ArgI != ArgE; ++ArgI) {
-      User *U = *ArgI;
-      // If FirstArg user is a CallInst that does not correspond to current
-      // call site then this function F is not suitable for sret promotion.
-      if (CallInst *CI = dyn_cast<CallInst>(U)) {
-        if (CI != Call)
-          return false;
-      }
-      // If FirstArg user is a GEP whose all users are not LoadInst then
-      // this function F is not suitable for sret promotion.
-      else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
-        // TODO : Use dom info and insert PHINodes to collect get results
-        // from multiple call sites for this GEP.
-        if (GEP->getParent() != Call->getParent())
-          return false;
-        for (Value::use_iterator GEPI = GEP->use_begin(), GEPE = GEP->use_end();
-             GEPI != GEPE; ++GEPI) 
-          if (!isa<LoadInst>(*GEPI))
-            return false;
-      } 
-      // Any other FirstArg users make this function unsuitable for sret 
-      // promotion.
-      else
-        return false;
-    }
-  }
-
-  return true;
-}
-
-/// cloneFunctionBody - Create a new function based on F and
-/// insert it into module. Remove first argument. Use STy as
-/// the return type for new function.
-Function *SRETPromotion::cloneFunctionBody(Function *F, 
-                                           const StructType *STy) {
-
-  const FunctionType *FTy = F->getFunctionType();
-  std::vector<const Type*> Params;
-
-  // Attributes - Keep track of the parameter attributes for the arguments.
-  SmallVector<AttributeWithIndex, 8> AttributesVec;
-  const AttrListPtr &PAL = F->getAttributes();
-
-  // Add any return attributes.
-  if (Attributes attrs = PAL.getRetAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
-
-  // Skip first argument.
-  Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
-  ++I;
-  // 0th parameter attribute is reserved for return type.
-  // 1th parameter attribute is for first 1st sret argument.
-  unsigned ParamIndex = 2; 
-  while (I != E) {
-    Params.push_back(I->getType());
-    if (Attributes Attrs = PAL.getParamAttributes(ParamIndex))
-      AttributesVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs));
-    ++I;
-    ++ParamIndex;
-  }
-
-  // Add any fn attributes.
-  if (Attributes attrs = PAL.getFnAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
-
-
-  FunctionType *NFTy = FunctionType::get(STy, Params, FTy->isVarArg());
-  Function *NF = Function::Create(NFTy, F->getLinkage());
-  NF->takeName(F);
-  NF->copyAttributesFrom(F);
-  NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()));
-  F->getParent()->getFunctionList().insert(F, NF);
-  NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
-
-  // Replace arguments
-  I = F->arg_begin();
-  E = F->arg_end();
-  Function::arg_iterator NI = NF->arg_begin();
-  ++I;
-  while (I != E) {
-    I->replaceAllUsesWith(NI);
-    NI->takeName(I);
-    ++I;
-    ++NI;
-  }
-
-  return NF;
-}
-
-/// updateCallSites - Update all sites that call F to use NF.
-CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) {
-  CallGraph &CG = getAnalysis<CallGraph>();
-  SmallVector<Value*, 16> Args;
-
-  // Attributes - Keep track of the parameter attributes for the arguments.
-  SmallVector<AttributeWithIndex, 8> ArgAttrsVec;
-
-  // Get a new callgraph node for NF.
-  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
-
-  while (!F->use_empty()) {
-    CallSite CS(*F->use_begin());
-    Instruction *Call = CS.getInstruction();
-
-    const AttrListPtr &PAL = F->getAttributes();
-    // Add any return attributes.
-    if (Attributes attrs = PAL.getRetAttributes())
-      ArgAttrsVec.push_back(AttributeWithIndex::get(0, attrs));
-
-    // Copy arguments, however skip first one.
-    CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
-    Value *FirstCArg = *AI;
-    ++AI;
-    // 0th parameter attribute is reserved for return type.
-    // 1th parameter attribute is for first 1st sret argument.
-    unsigned ParamIndex = 2; 
-    while (AI != AE) {
-      Args.push_back(*AI); 
-      if (Attributes Attrs = PAL.getParamAttributes(ParamIndex))
-        ArgAttrsVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs));
-      ++ParamIndex;
-      ++AI;
-    }
-
-    // Add any function attributes.
-    if (Attributes attrs = PAL.getFnAttributes())
-      ArgAttrsVec.push_back(AttributeWithIndex::get(~0, attrs));
-    
-    AttrListPtr NewPAL = AttrListPtr::get(ArgAttrsVec.begin(), ArgAttrsVec.end());
-    
-    // Build new call instruction.
-    Instruction *New;
-    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
-      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
-                               Args.begin(), Args.end(), "", Call);
-      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
-      cast<InvokeInst>(New)->setAttributes(NewPAL);
-    } else {
-      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
-      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
-      cast<CallInst>(New)->setAttributes(NewPAL);
-      if (cast<CallInst>(Call)->isTailCall())
-        cast<CallInst>(New)->setTailCall();
-    }
-    Args.clear();
-    ArgAttrsVec.clear();
-    New->takeName(Call);
-
-    // Update the callgraph to know that the callsite has been transformed.
-    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
-    CalleeNode->removeCallEdgeFor(Call);
-    CalleeNode->addCalledFunction(New, NF_CGN);
-    
-    // Update all users of sret parameter to extract value using extractvalue.
-    for (Value::use_iterator UI = FirstCArg->use_begin(), 
-           UE = FirstCArg->use_end(); UI != UE; ) {
-      User *U2 = *UI++;
-      CallInst *C2 = dyn_cast<CallInst>(U2);
-      if (C2 && (C2 == Call))
-        continue;
-      
-      GetElementPtrInst *UGEP = cast<GetElementPtrInst>(U2);
-      ConstantInt *Idx = cast<ConstantInt>(UGEP->getOperand(2));
-      Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(),
-                                           "evi", UGEP);
-      while(!UGEP->use_empty()) {
-        // isSafeToUpdateAllCallers has checked that all GEP uses are
-        // LoadInsts
-        LoadInst *L = cast<LoadInst>(*UGEP->use_begin());
-        L->replaceAllUsesWith(GR);
-        L->eraseFromParent();
-      }
-      UGEP->eraseFromParent();
-      continue;
-    }
-    Call->eraseFromParent();
-  }
-  
-  return NF_CGN;
-}
-
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 9c2969c..9c70cf8 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -11,6 +11,7 @@
 #define INSTCOMBINE_INSTCOMBINE_H
 
 #include "InstCombineWorklist.h"
+#include "llvm/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/IRBuilder.h"
@@ -69,7 +70,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
                              : public FunctionPass,
                                public InstVisitor<InstCombiner, Instruction*> {
   TargetData *TD;
-  bool MustPreserveLCSSA;
   bool MadeIRChange;
 public:
   /// Worklist - All of the instructions that need to be simplified.
@@ -217,8 +217,8 @@ private:
   Instruction *transformCallThroughTrampoline(CallSite CS);
   Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
                                  bool DoXform = true);
+  Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
   bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
-  DbgDeclareInst *hasOneUsePlusDeclare(Value *V);
   Value *EmitGEPOffset(User *GEP);
 
 public:
@@ -247,7 +247,10 @@ public:
     // segment of unreachable code, so just clobber the instruction.
     if (&I == V) 
       V = UndefValue::get(I.getType());
-      
+
+    DEBUG(errs() << "IC: Replacing " << I << "\n"
+                    "    with " << *V << '\n');
+
     I.replaceAllUsesWith(V);
     return &I;
   }
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 7986d1a..a08446e 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -14,6 +14,7 @@
 #include "InstCombine.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
@@ -330,7 +331,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
 
 
 /// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
-/// true, otherwise (V < Lo || V >= Hi).  In pratice, we emit the more efficient
+/// true, otherwise (V < Lo || V >= Hi).  In practice, we emit the more efficient
 /// (V-Lo) <u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
 /// whether to treat the V, Lo and HI as signed or not. IB is the location to
 /// insert new instructions.
@@ -755,6 +756,54 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       Value *NewOr = Builder->CreateOr(Val, Val2);
       return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
     }
+
+    // (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0)
+    if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) {
+      Value *NewAnd = Builder->CreateAnd(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewAnd, LHSCst);
+    }
+
+    // (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1)
+    if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+    }
+  }
+
+  // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
+  // where CMAX is the all ones value for the truncated type,
+  // iff the lower bits of C2 and CA are zero.
+  if (LHSCC == RHSCC && ICmpInst::isEquality(LHSCC) &&
+      LHS->hasOneUse() && RHS->hasOneUse()) {
+    Value *V;
+    ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0;
+
+    // (trunc x) == C1 & (and x, CA) == C2
+    if (match(Val2, m_Trunc(m_Value(V))) &&
+        match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+      SmallCst = RHSCst;
+      BigCst = LHSCst;
+    }
+    // (and x, CA) == C2 & (trunc x) == C1
+    else if (match(Val, m_Trunc(m_Value(V))) &&
+             match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+      SmallCst = LHSCst;
+      BigCst = RHSCst;
+    }
+
+    if (SmallCst && BigCst) {
+      unsigned BigBitSize = BigCst->getType()->getBitWidth();
+      unsigned SmallBitSize = SmallCst->getType()->getBitWidth();
+
+      // Check that the low bits are zero.
+      APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
+      if ((Low & AndCst->getValue()) == 0 && (Low & BigCst->getValue()) == 0) {
+        Value *NewAnd = Builder->CreateAnd(V, Low | AndCst->getValue());
+        APInt N = SmallCst->getValue().zext(BigBitSize) | BigCst->getValue();
+        Value *NewVal = ConstantInt::get(AndCst->getType()->getContext(), N);
+        return Builder->CreateICmp(LHSCC, NewAnd, NewVal);
+      }
+    }
   }
   
   // From here on, we only handle:
@@ -767,7 +816,17 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
       RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
     return 0;
-  
+
+  // Make a constant range that's the intersection of the two icmp ranges.
+  // If the intersection is empty, we know that the result is false.
+  ConstantRange LHSRange = 
+    ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue());
+  ConstantRange RHSRange = 
+    ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue());
+
+  if (LHSRange.intersectWith(RHSRange).isEmptySet())
+    return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+
   // We can't fold (ugt x, C) & (sgt x, C2).
   if (!PredicatesFoldable(LHSCC, RHSCC))
     return 0;
@@ -800,10 +859,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   case ICmpInst::ICMP_EQ:
     switch (RHSCC) {
     default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_EQ:         // (X == 13 & X == 15) -> false
-    case ICmpInst::ICMP_UGT:        // (X == 13 & X >  15) -> false
-    case ICmpInst::ICMP_SGT:        // (X == 13 & X >  15) -> false
-      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
     case ICmpInst::ICMP_NE:         // (X == 13 & X != 15) -> X == 13
     case ICmpInst::ICMP_ULT:        // (X == 13 & X <  15) -> X == 13
     case ICmpInst::ICMP_SLT:        // (X == 13 & X <  15) -> X == 13
@@ -851,9 +906,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   case ICmpInst::ICMP_SLT:
     switch (RHSCC) {
     default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_EQ:         // (X s< 13 & X == 15) -> false
-    case ICmpInst::ICMP_SGT:        // (X s< 13 & X s> 15) -> false
-      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
     case ICmpInst::ICMP_UGT:        // (X s< 13 & X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X s< 13 & X != 15) -> X < 13
@@ -1438,6 +1490,18 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       Value *NewOr = Builder->CreateOr(Val, Val2);
       return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
     }
+
+    // (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0)
+    if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+    }
+
+    // (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1)
+    if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) {
+      Value *NewAnd = Builder->CreateAnd(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewAnd, LHSCst);
+    }
   }
 
   // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
@@ -1975,7 +2039,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       }
     }
   }
-  
+
+  // or(sext(A), B) -> A ? -1 : B where A is an i1
+  // or(A, sext(B)) -> B ? -1 : A where B is an i1
+  if (match(Op0, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1))
+    return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1);
+  if (match(Op1, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1))
+    return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0);
+
   // Note: If we've gotten to the point of visiting the outer OR, then the
   // inner one couldn't be simplified.  If it was a constant, then it won't
   // be simplified by a later pass either, so we try swapping the inner/outer
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 0e46450..726105f 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -475,7 +475,36 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       }
     }
     break;
-  case Intrinsic::umul_with_overflow:
+  case Intrinsic::umul_with_overflow: {
+    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+    unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth();
+    APInt Mask = APInt::getAllOnesValue(BitWidth);
+
+    APInt LHSKnownZero(BitWidth, 0);
+    APInt LHSKnownOne(BitWidth, 0);
+    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+    APInt RHSKnownZero(BitWidth, 0);
+    APInt RHSKnownOne(BitWidth, 0);
+    ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+
+    // Get the largest possible values for each operand.
+    APInt LHSMax = ~LHSKnownZero;
+    APInt RHSMax = ~RHSKnownZero;
+
+    // If multiplying the maximum values does not overflow then we can turn
+    // this into a plain NUW mul.
+    bool Overflow;
+    LHSMax.umul_ov(RHSMax, Overflow);
+    if (!Overflow) {
+      Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow");
+      Constant *V[] = {
+        UndefValue::get(LHS->getType()),
+        Builder->getFalse()
+      };
+      Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+      return InsertValueInst::Create(Struct, Mul, 0);
+    }
+  } // FALL THROUGH
   case Intrinsic::smul_with_overflow:
     // Canonicalize constants into the RHS.
     if (isa<Constant>(II->getArgOperand(0)) &&
@@ -508,11 +537,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   case Intrinsic::ppc_altivec_lvx:
   case Intrinsic::ppc_altivec_lvxl:
-  case Intrinsic::x86_sse_loadu_ps:
-  case Intrinsic::x86_sse2_loadu_pd:
-  case Intrinsic::x86_sse2_loadu_dq:
-    // Turn PPC lvx     -> load if the pointer is known aligned.
-    // Turn X86 loadups -> load if the pointer is known aligned.
+    // Turn PPC lvx -> load if the pointer is known aligned.
     if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
                                          PointerType::getUnqual(II->getType()));
@@ -731,9 +756,13 @@ protected:
                            dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
       if (SizeCI->isAllOnesValue())
         return true;
-      if (isString)
-        return SizeCI->getZExtValue() >=
-               GetStringLength(CI->getArgOperand(SizeArgOp));
+      if (isString) {
+        uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
+        // If the length is 0 we don't know how long it is and so we can't
+        // remove the check.
+        if (Len == 0) return false;
+        return SizeCI->getZExtValue() >= Len;
+      }
       if (ConstantInt *Arg = dyn_cast<ConstantInt>(
                                                   CI->getArgOperand(SizeArgOp)))
         return SizeCI->getZExtValue() >= Arg->getZExtValue();
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index b432641..6f70de8 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -87,10 +87,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
 
   // If the allocation has multiple uses, only promote it if we are strictly
   // increasing the alignment of the resultant allocation.  If we keep it the
-  // same, we open the door to infinite loops of various kinds.  (A reference
-  // from a dbg.declare doesn't count as a use for this purpose.)
-  if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) &&
-      CastElTyAlign == AllocElTyAlign) return 0;
+  // same, we open the door to infinite loops of various kinds.
+  if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0;
 
   uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy);
   uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
@@ -128,15 +126,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   New->setAlignment(AI.getAlignment());
   New->takeName(&AI);
   
-  // If the allocation has one real use plus a dbg.declare, just remove the
-  // declare.
-  if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) {
-    EraseInstFromFunction(*(Instruction*)DI);
-  }
   // If the allocation has multiple real uses, insert a cast and change all
   // things that used it to use the new cast.  This will also hack on CI, but it
   // will die soon.
-  else if (!AI.hasOneUse()) {
+  if (!AI.hasOneUse()) {
     // New is the allocation instruction, pointer typed. AI is the original
     // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
     Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast");
@@ -203,7 +196,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
   }
   case Instruction::PHI: {
     PHINode *OPN = cast<PHINode>(I);
-    PHINode *NPN = PHINode::Create(Ty);
+    PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
     for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
       Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
       NPN->addIncoming(V, OPN->getIncomingBlock(i));
@@ -883,6 +876,102 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
   return 0;
 }
 
+/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations
+/// in order to eliminate the icmp.
+Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
+  Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
+  ICmpInst::Predicate Pred = ICI->getPredicate();
+
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+    // (x <s  0) ? -1 : 0 -> ashr x, 31        -> all ones if negative
+    // (x >s -1) ? -1 : 0 -> not (ashr x, 31)  -> all ones if positive
+    if ((Pred == ICmpInst::ICMP_SLT && Op1C->isZero()) ||
+        (Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) {
+
+      Value *Sh = ConstantInt::get(Op0->getType(),
+                                   Op0->getType()->getScalarSizeInBits()-1);
+      Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit");
+      if (In->getType() != CI.getType())
+        In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp");
+
+      if (Pred == ICmpInst::ICMP_SGT)
+        In = Builder->CreateNot(In, In->getName()+".not");
+      return ReplaceInstUsesWith(CI, In);
+    }
+
+    // If we know that only one bit of the LHS of the icmp can be set and we
+    // have an equality comparison with zero or a power of 2, we can transform
+    // the icmp and sext into bitwise/integer operations.
+    if (ICI->hasOneUse() &&
+        ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
+      unsigned BitWidth = Op1C->getType()->getBitWidth();
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+      ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne);
+
+      APInt KnownZeroMask(~KnownZero);
+      if (KnownZeroMask.isPowerOf2()) {
+        Value *In = ICI->getOperand(0);
+
+        // If the icmp tests for a known zero bit we can constant fold it.
+        if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
+          Value *V = Pred == ICmpInst::ICMP_NE ?
+                       ConstantInt::getAllOnesValue(CI.getType()) :
+                       ConstantInt::getNullValue(CI.getType());
+          return ReplaceInstUsesWith(CI, V);
+        }
+
+        if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
+          // sext ((x & 2^n) == 0)   -> (x >> n) - 1
+          // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
+          unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
+          // Perform a right shift to place the desired bit in the LSB.
+          if (ShiftAmt)
+            In = Builder->CreateLShr(In,
+                                     ConstantInt::get(In->getType(), ShiftAmt));
+
+          // At this point "In" is either 1 or 0. Subtract 1 to turn
+          // {1, 0} -> {0, -1}.
+          In = Builder->CreateAdd(In,
+                                  ConstantInt::getAllOnesValue(In->getType()),
+                                  "sext");
+        } else {
+          // sext ((x & 2^n) != 0)   -> (x << bitwidth-n) a>> bitwidth-1
+          // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
+          unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
+          // Perform a left shift to place the desired bit in the MSB.
+          if (ShiftAmt)
+            In = Builder->CreateShl(In,
+                                    ConstantInt::get(In->getType(), ShiftAmt));
+
+          // Distribute the bit over the whole bit width.
+          In = Builder->CreateAShr(In, ConstantInt::get(In->getType(),
+                                                        BitWidth - 1), "sext");
+        }
+
+        if (CI.getType() == In->getType())
+          return ReplaceInstUsesWith(CI, In);
+        return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/);
+      }
+    }
+  }
+
+  // vector (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed.
+  if (const VectorType *VTy = dyn_cast<VectorType>(CI.getType())) {
+    if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) &&
+        Op0->getType() == CI.getType()) {
+      const Type *EltTy = VTy->getElementType();
+
+      // splat the shift constant to a constant vector.
+      Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1);
+      Value *In = Builder->CreateAShr(Op0, VSh, Op0->getName()+".lobit");
+      return ReplaceInstUsesWith(CI, In);
+    }
+  }
+
+  return 0;
+}
+
 /// CanEvaluateSExtd - Return true if we can take the specified value
 /// and return it as type Ty without inserting any new casts and without
 /// changing the value of the common low bits.  This is used by code that tries
@@ -1006,44 +1095,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
       Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
       return BinaryOperator::CreateAShr(Res, ShAmt);
     }
-  
-  
-  // (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed
-  // (x >s -1) ? -1 : 0 -> ashr x, 31  -> all ones if not signed
-  {
-  ICmpInst::Predicate Pred; Value *CmpLHS; ConstantInt *CmpRHS;
-  if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_ConstantInt(CmpRHS)))) {
-    // sext (x <s  0) to i32 --> x>>s31       true if signbit set.
-    // sext (x >s -1) to i32 --> (x>>s31)^-1  true if signbit clear.
-    if ((Pred == ICmpInst::ICMP_SLT && CmpRHS->isZero()) ||
-        (Pred == ICmpInst::ICMP_SGT && CmpRHS->isAllOnesValue())) {
-      Value *Sh = ConstantInt::get(CmpLHS->getType(),
-                                   CmpLHS->getType()->getScalarSizeInBits()-1);
-      Value *In = Builder->CreateAShr(CmpLHS, Sh, CmpLHS->getName()+".lobit");
-      if (In->getType() != CI.getType())
-        In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp");
-      
-      if (Pred == ICmpInst::ICMP_SGT)
-        In = Builder->CreateNot(In, In->getName()+".not");
-      return ReplaceInstUsesWith(CI, In);
-    }
-  }
-  }
 
-  // vector (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed.
-  if (const VectorType *VTy = dyn_cast<VectorType>(DestTy)) {
-    ICmpInst::Predicate Pred; Value *CmpLHS;
-    if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_Zero()))) {
-      if (Pred == ICmpInst::ICMP_SLT && CmpLHS->getType() == DestTy) {
-        const Type *EltTy = VTy->getElementType();
-
-        // splat the shift constant to a constant vector.
-        Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1);
-        Value *In = Builder->CreateAShr(CmpLHS, VSh,CmpLHS->getName()+".lobit");
-        return ReplaceInstUsesWith(CI, In);
-      }
-    }
-  }
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+    return transformSExtICmp(ICI, CI);
 
   // If the input is a shl/ashr pair of a same constant, then this is a sign
   // extension from a smaller value.  If we could trust arbitrary bitwidth
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 999de34..bb9b88b 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -699,7 +699,7 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
     return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
 
   // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
-  // so the values can never be equal.  Similiarly for all other "or equals"
+  // so the values can never be equal.  Similarly for all other "or equals"
   // operators.
   
   // (X+1) <u X        --> X >u (MAXUINT-1)        --> X == 255
@@ -1289,13 +1289,21 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
   }
     
   case Instruction::LShr:         // (icmp pred (shr X, ShAmt), CI)
-  case Instruction::AShr:
-    // Only handle equality comparisons of shift-by-constant.
-    if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
-      if (Instruction *Res = FoldICmpShrCst(ICI, cast<BinaryOperator>(LHSI),
-                                            ShAmt))
+  case Instruction::AShr: {
+    // Handle equality comparisons of shift-by-constant.
+    BinaryOperator *BO = cast<BinaryOperator>(LHSI);
+    if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+      if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt))
         return Res;
+    }
+
+    // Handle exact shr's.
+    if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) {
+      if (RHSV.isMinValue())
+        return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS);
+    }
     break;
+  }
     
   case Instruction::SDiv:
   case Instruction::UDiv:
@@ -1376,9 +1384,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           
           if (Value *NegVal = dyn_castNegVal(BOp1))
             return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
-          else if (Value *NegVal = dyn_castNegVal(BOp0))
+          if (Value *NegVal = dyn_castNegVal(BOp0))
             return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
-          else if (BO->hasOneUse()) {
+          if (BO->hasOneUse()) {
             Value *Neg = Builder->CreateNeg(BOp1);
             Neg->takeName(BO);
             return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
@@ -1855,11 +1863,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
                           ConstantInt::get(CI->getContext(), CI->getValue()+1));
     case ICmpInst::ICMP_UGE:
-      assert(!CI->isMinValue(false));                  // A >=u MIN -> TRUE
+      assert(!CI->isMinValue(false));                 // A >=u MIN -> TRUE
       return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
                           ConstantInt::get(CI->getContext(), CI->getValue()-1));
     case ICmpInst::ICMP_SGE:
-      assert(!CI->isMinValue(true));                   // A >=s MIN -> TRUE
+      assert(!CI->isMinValue(true));                  // A >=s MIN -> TRUE
       return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
                           ConstantInt::get(CI->getContext(), CI->getValue()-1));
     }
@@ -1907,18 +1915,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     // that code below can assume that Min != Max.
     if (!isa<Constant>(Op0) && Op0Min == Op0Max)
       return new ICmpInst(I.getPredicate(),
-                          ConstantInt::get(I.getContext(), Op0Min), Op1);
+                          ConstantInt::get(Op0->getType(), Op0Min), Op1);
     if (!isa<Constant>(Op1) && Op1Min == Op1Max)
       return new ICmpInst(I.getPredicate(), Op0,
-                          ConstantInt::get(I.getContext(), Op1Min));
+                          ConstantInt::get(Op1->getType(), Op1Min));
 
     // Based on the range information we know about the LHS, see if we can
-    // simplify this comparison.  For example, (x&4) < 8  is always true.
+    // simplify this comparison.  For example, (x&4) < 8 is always true.
     switch (I.getPredicate()) {
     default: llvm_unreachable("Unknown icmp opcode!");
     case ICmpInst::ICMP_EQ: {
       if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
         
       // If all bits are known zero except for one, then we know at most one
       // bit is set.   If the comparison is against zero, then this is a check
@@ -1955,7 +1963,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     }
     case ICmpInst::ICMP_NE: {
       if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
       
       // If all bits are known zero except for one, then we know at most one
       // bit is set.   If the comparison is against zero, then this is a check
@@ -1992,9 +2000,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     }
     case ICmpInst::ICMP_ULT:
       if (Op0Max.ult(Op1Min))          // A <u B -> true if max(A) < min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
       if (Op0Min.uge(Op1Max))          // A <u B -> false if min(A) >= max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
       if (Op1Min == Op0Max)            // A <u B -> A != B if max(A) == min(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
@@ -2010,9 +2018,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       break;
     case ICmpInst::ICMP_UGT:
       if (Op0Min.ugt(Op1Max))          // A >u B -> true if min(A) > max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
       if (Op0Max.ule(Op1Min))          // A >u B -> false if max(A) <= max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
 
       if (Op1Max == Op0Min)            // A >u B -> A != B if min(A) == max(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
@@ -2029,9 +2037,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       break;
     case ICmpInst::ICMP_SLT:
       if (Op0Max.slt(Op1Min))          // A <s B -> true if max(A) < min(C)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
       if (Op0Min.sge(Op1Max))          // A <s B -> false if min(A) >= max(C)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
       if (Op1Min == Op0Max)            // A <s B -> A != B if max(A) == min(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
@@ -2042,9 +2050,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       break;
     case ICmpInst::ICMP_SGT:
       if (Op0Min.sgt(Op1Max))          // A >s B -> true if min(A) > max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
       if (Op0Max.sle(Op1Min))          // A >s B -> false if max(A) <= min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
 
       if (Op1Max == Op0Min)            // A >s B -> A != B if min(A) == max(B)
         return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
@@ -2057,30 +2065,30 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     case ICmpInst::ICMP_SGE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
       if (Op0Min.sge(Op1Max))          // A >=s B -> true if min(A) >= max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
       if (Op0Max.slt(Op1Min))          // A >=s B -> false if max(A) < min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
       break;
     case ICmpInst::ICMP_SLE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
       if (Op0Max.sle(Op1Min))          // A <=s B -> true if max(A) <= min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
       if (Op0Min.sgt(Op1Max))          // A <=s B -> false if min(A) > max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
       break;
     case ICmpInst::ICMP_UGE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
       if (Op0Min.uge(Op1Max))          // A >=u B -> true if min(A) >= max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
       if (Op0Max.ult(Op1Min))          // A >=u B -> false if max(A) < min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
       break;
     case ICmpInst::ICMP_ULE:
       assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
       if (Op0Max.ule(Op1Min))          // A <=u B -> true if max(A) <= min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
       if (Op0Min.ugt(Op1Max))          // A <=u B -> false if min(A) > max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
       break;
     }
 
@@ -2306,6 +2314,35 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         BO0->hasOneUse() && BO1->hasOneUse())
       return new ICmpInst(Pred, D, B);
 
+    BinaryOperator *SRem = NULL;
+    // icmp (srem X, Y), Y
+    if (BO0 && BO0->getOpcode() == Instruction::SRem &&
+        Op1 == BO0->getOperand(1))
+      SRem = BO0;
+    // icmp Y, (srem X, Y)
+    else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
+             Op0 == BO1->getOperand(1))
+      SRem = BO1;
+    if (SRem) {
+      // We don't check hasOneUse to avoid increasing register pressure because
+      // the value we use is the same value this instruction was already using.
+      switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
+        default: break;
+        case ICmpInst::ICMP_EQ:
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+        case ICmpInst::ICMP_NE:
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+        case ICmpInst::ICMP_SGT:
+        case ICmpInst::ICMP_SGE:
+          return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
+                              Constant::getAllOnesValue(SRem->getType()));
+        case ICmpInst::ICMP_SLT:
+        case ICmpInst::ICMP_SLE:
+          return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
+                              Constant::getNullValue(SRem->getType()));
+      }
+    }
+
     if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() &&
         BO0->hasOneUse() && BO1->hasOneUse() &&
         BO0->getOperand(1) == BO1->getOperand(1)) {
@@ -2356,6 +2393,27 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
           }
         }
         break;
+      case Instruction::UDiv:
+      case Instruction::LShr:
+        if (I.isSigned())
+          break;
+        // fall-through
+      case Instruction::SDiv:
+      case Instruction::AShr:
+        if (!BO0->isExact() && !BO1->isExact())
+          break;
+        return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+                            BO1->getOperand(0));
+      case Instruction::Shl: {
+        bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
+        bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
+        if (!NUW && !NSW)
+          break;
+        if (!NSW && I.isSigned())
+          break;
+        return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+                            BO1->getOperand(0));
+      }
       }
     }
   }
@@ -2425,9 +2483,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     }
 
     // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
-    if (Op0->hasOneUse() && Op1->hasOneUse() &&
-        match(Op0, m_And(m_Value(A), m_Value(B))) && 
-        match(Op1, m_And(m_Value(C), m_Value(D)))) {
+    if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && 
+        match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
       Value *X = 0, *Y = 0, *Z = 0;
       
       if (A == C) {
@@ -2448,6 +2505,32 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         return &I;
       }
     }
+    
+    // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
+    // "icmp (and X, mask), cst"
+    uint64_t ShAmt = 0;
+    ConstantInt *Cst1;
+    if (Op0->hasOneUse() &&
+        match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A),
+                                           m_ConstantInt(ShAmt))))) &&
+        match(Op1, m_ConstantInt(Cst1)) &&
+        // Only do this when A has multiple uses.  This is most important to do
+        // when it exposes other optimizations.
+        !A->hasOneUse()) {
+      unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
+      
+      if (ShAmt < ASize) {
+        APInt MaskV =
+          APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
+        MaskV <<= ShAmt;
+        
+        APInt CmpV = Cst1->getValue().zext(ASize);
+        CmpV <<= ShAmt;
+        
+        Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
+        return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV));
+      }
+    }
   }
   
   {
@@ -2704,6 +2787,42 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
   if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
     if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
       switch (LHSI->getOpcode()) {
+      case Instruction::FPExt: {
+        // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless
+        FPExtInst *LHSExt = cast<FPExtInst>(LHSI);
+        ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC);
+        if (!RHSF)
+          break;
+
+        // We can't convert a PPC double double.
+        if (RHSF->getType()->isPPC_FP128Ty())
+          break;
+
+        const fltSemantics *Sem;
+        // FIXME: This shouldn't be here.
+        if (LHSExt->getSrcTy()->isFloatTy())
+          Sem = &APFloat::IEEEsingle;
+        else if (LHSExt->getSrcTy()->isDoubleTy())
+          Sem = &APFloat::IEEEdouble;
+        else if (LHSExt->getSrcTy()->isFP128Ty())
+          Sem = &APFloat::IEEEquad;
+        else if (LHSExt->getSrcTy()->isX86_FP80Ty())
+          Sem = &APFloat::x87DoubleExtended;
+        else
+          break;
+
+        bool Lossy;
+        APFloat F = RHSF->getValueAPF();
+        F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy);
+
+        // Avoid lossy conversions and denormals.
+        if (!Lossy &&
+            F.compare(APFloat::getSmallestNormalized(*Sem)) !=
+                                                           APFloat::cmpLessThan)
+          return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
+                              ConstantFP::get(RHSC->getContext(), F));
+        break;
+      }
       case Instruction::PHI:
         // Only fold fcmp into the PHI if the phi and fcmp are in the same
         // block.  If in the same block, we're encouraging jump threading.  If
@@ -2742,6 +2861,14 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
           return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
         break;
       }
+      case Instruction::FSub: {
+        // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C
+        Value *Op;
+        if (match(LHSI, m_FNeg(m_Value(Op))))
+          return new FCmpInst(I.getSwappedPredicate(), Op,
+                              ConstantExpr::getFNeg(RHSC));
+        break;
+      }
       case Instruction::Load:
         if (GetElementPtrInst *GEP =
             dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
@@ -2755,5 +2882,17 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
       }
   }
 
+  // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y
+  Value *X, *Y;
+  if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
+    return new FCmpInst(I.getSwappedPredicate(), X, Y);
+
+  // fcmp (fpext x), (fpext y) -> fcmp x, y
+  if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0))
+    if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1))
+      if (LHSExt->getSrcTy() == RHSExt->getSrcTy())
+        return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
+                            RHSExt->getOperand(0));
+
   return Changed ? &I : 0;
 }
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 78ff734..432adc9 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -364,34 +364,12 @@ static bool equivalentAddressValues(Value *A, Value *B) {
   return false;
 }
 
-// If this instruction has two uses, one of which is a llvm.dbg.declare,
-// return the llvm.dbg.declare.
-DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) {
-  if (!V->hasNUses(2))
-    return 0;
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
-       UI != E; ++UI) {
-    User *U = *UI;
-    if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U))
-      return DI;
-    if (isa<BitCastInst>(U) && U->hasOneUse()) {
-      if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(*U->use_begin()))
-        return DI;
-      }
-  }
-  return 0;
-}
-
 Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   Value *Val = SI.getOperand(0);
   Value *Ptr = SI.getOperand(1);
 
   // If the RHS is an alloca with a single use, zapify the store, making the
   // alloca dead.
-  // If the RHS is an alloca with a two uses, the other one being a 
-  // llvm.dbg.declare, zapify the store and the declare, making the
-  // alloca dead.  We must do this to prevent declares from affecting
-  // codegen.
   if (!SI.isVolatile()) {
     if (Ptr->hasOneUse()) {
       if (isa<AllocaInst>(Ptr)) 
@@ -400,17 +378,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
         if (isa<AllocaInst>(GEP->getOperand(0))) {
           if (GEP->getOperand(0)->hasOneUse())
             return EraseInstFromFunction(SI);
-          if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) {
-            EraseInstFromFunction(*DI);
-            return EraseInstFromFunction(SI);
-          }
         }
       }
     }
-    if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) {
-      EraseInstFromFunction(*DI);
-      return EraseInstFromFunction(SI);
-    }
   }
 
   // Attempt to improve the alignment.
@@ -621,8 +591,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   // Insert a PHI node now if we need it.
   Value *MergedVal = OtherStore->getOperand(0);
   if (MergedVal != SI.getOperand(0)) {
-    PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge");
-    PN->reserveOperandSpace(2);
+    PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
     PN->addIncoming(SI.getOperand(0), SI.getParent());
     PN->addIncoming(OtherStore->getOperand(0), OtherBB);
     MergedVal = InsertNewInstBefore(PN, DestBB->front());
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index d1a1fd6..57fb08a 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -320,6 +320,10 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
     }
   }
 
+  // See if we can fold away this div instruction.
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+
   // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y
   Value *X = 0, *Z = 0;
   if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1
@@ -332,6 +336,19 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
   return 0;
 }
 
+/// dyn_castZExtVal - Checks if V is a zext or constant that can
+/// be truncated to Ty without losing bits.
+static Value *dyn_castZExtVal(Value *V, const Type *Ty) {
+  if (ZExtInst *Z = dyn_cast<ZExtInst>(V)) {
+    if (Z->getSrcTy() == Ty)
+      return Z->getOperand(0);
+  } else if (ConstantInt *C = dyn_cast<ConstantInt>(V)) {
+    if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth())
+      return ConstantExpr::getTrunc(C, Ty);
+  }
+  return 0;
+}
+
 Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
@@ -390,6 +407,14 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
       return SelectInst::Create(Cond, TSI, FSI);
     }
   }
+
+  // (zext A) udiv (zext B) --> zext (A udiv B)
+  if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
+    if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
+      return new ZExtInst(Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div",
+                                              I.isExact()),
+                          I.getType());
+
   return 0;
 }
 
@@ -452,27 +477,17 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
   if (Value *V = SimplifyFDivInst(Op0, Op1, TD))
     return ReplaceInstUsesWith(I, V);
 
-  return 0;
-}
+  if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+    const APFloat &Op1F = Op1C->getValueAPF();
 
-/// This function implements the transforms on rem instructions that work
-/// regardless of the kind of rem instruction it is (urem, srem, or frem). It 
-/// is used by the visitors to those instructions.
-/// @brief Transforms common to all three rem instructions
-Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  if (isa<UndefValue>(Op0)) {             // undef % X -> 0
-    if (I.getType()->isFPOrFPVectorTy())
-      return ReplaceInstUsesWith(I, Op0);  // X % undef -> undef (could be SNaN)
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+    // If the divisor has an exact multiplicative inverse we can turn the fdiv
+    // into a cheaper fmul.
+    APFloat Reciprocal(Op1F.getSemantics());
+    if (Op1F.getExactInverse(&Reciprocal)) {
+      ConstantFP *RFP = ConstantFP::get(Builder->getContext(), Reciprocal);
+      return BinaryOperator::CreateFMul(Op0, RFP);
+    }
   }
-  if (isa<UndefValue>(Op1))
-    return ReplaceInstUsesWith(I, Op1);  // X % undef -> undef
-
-  // Handle cases involving: rem X, (select Cond, Y, Z)
-  if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
-    return &I;
 
   return 0;
 }
@@ -484,26 +499,11 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
 Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  if (Instruction *common = commonRemTransforms(I))
-    return common;
-
-  // X % X == 0
-  if (Op0 == Op1)
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
-  // 0 % X == 0 for integer, we don't need to preserve faults!
-  if (Constant *LHS = dyn_cast<Constant>(Op0))
-    if (LHS->isNullValue())
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+  // Handle cases involving: rem X, (select Cond, Y, Z)
+  if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+    return &I;
 
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    // X % 0 == undef, we don't need to preserve faults!
-    if (RHS->equalsInt(0))
-      return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
-    
-    if (RHS->equalsInt(1))  // X % 1 == 0
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
     if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
       if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
         if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -525,6 +525,9 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
 Instruction *InstCombiner::visitURem(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
+  if (Value *V = SimplifyURemInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
   if (Instruction *common = commonIRemTransforms(I))
     return common;
   
@@ -552,13 +555,22 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
       return SelectInst::Create(Cond, TrueAnd, FalseAnd);
     }
   }
-  
+
+  // (zext A) urem (zext B) --> zext (A urem B)
+  if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
+    if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
+      return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1),
+                          I.getType());
+
   return 0;
 }
 
 Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
+  if (Value *V = SimplifySRemInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
   // Handle the integer rem common cases
   if (Instruction *Common = commonIRemTransforms(I))
     return Common;
@@ -617,6 +629,14 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
 }
 
 Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
-  return commonRemTransforms(I);
-}
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyFRemInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // Handle cases involving: rem X, (select Cond, Y, Z)
+  if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+    return &I;
 
+  return 0;
+}
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 297a18c..abf61bb 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -80,18 +80,16 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   Value *InRHS = FirstInst->getOperand(1);
   PHINode *NewLHS = 0, *NewRHS = 0;
   if (LHSVal == 0) {
-    NewLHS = PHINode::Create(LHSType,
+    NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(),
                              FirstInst->getOperand(0)->getName() + ".pn");
-    NewLHS->reserveOperandSpace(PN.getNumOperands()/2);
     NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
     InsertNewInstBefore(NewLHS, PN);
     LHSVal = NewLHS;
   }
   
   if (RHSVal == 0) {
-    NewRHS = PHINode::Create(RHSType,
+    NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
                              FirstInst->getOperand(1)->getName() + ".pn");
-    NewRHS->reserveOperandSpace(PN.getNumOperands()/2);
     NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
     InsertNewInstBefore(NewRHS, PN);
     RHSVal = NewRHS;
@@ -202,11 +200,10 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
     if (FixedOperands[i]) continue;  // operand doesn't need a phi.
     Value *FirstOp = FirstInst->getOperand(i);
-    PHINode *NewPN = PHINode::Create(FirstOp->getType(),
+    PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
                                      FirstOp->getName()+".pn");
     InsertNewInstBefore(NewPN, PN);
     
-    NewPN->reserveOperandSpace(e);
     NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
     OperandPhis[i] = NewPN;
     FixedOperands[i] = NewPN;
@@ -240,7 +237,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
 /// obvious the value of the load is not changed from the point of the load to
 /// the end of the block it is in.
 ///
-/// Finally, it is safe, but not profitable, to sink a load targetting a
+/// Finally, it is safe, but not profitable, to sink a load targeting a
 /// non-address-taken alloca.  Doing so will cause us to not promote the alloca
 /// to a register.
 static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
@@ -340,8 +337,8 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
   // Okay, they are all the same operation.  Create a new PHI node of the
   // correct type, and PHI together all of the LHS's of the instructions.
   PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
+                                   PN.getNumIncomingValues(),
                                    PN.getName()+".in");
-  NewPN->reserveOperandSpace(PN.getNumOperands()/2);
   
   Value *InVal = FirstLI->getOperand(0);
   NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
@@ -446,8 +443,8 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   // Okay, they are all the same operation.  Create a new PHI node of the
   // correct type, and PHI together all of the LHS's of the instructions.
   PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),
+                                   PN.getNumIncomingValues(),
                                    PN.getName()+".in");
-  NewPN->reserveOperandSpace(PN.getNumOperands()/2);
 
   Value *InVal = FirstInst->getOperand(0);
   NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
@@ -699,7 +696,8 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
     if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
       
       // Otherwise, Create the new PHI node for this user.
-      EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN);
+      EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),
+                               PN->getName()+".off"+Twine(Offset), PN);
       assert(EltPHI->getType() != PN->getType() &&
              "Truncate didn't shrink phi?");
     
@@ -776,9 +774,6 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
 // PHINode simplification
 //
 Instruction *InstCombiner::visitPHINode(PHINode &PN) {
-  // If LCSSA is around, don't mess with Phi nodes
-  if (MustPreserveLCSSA) return 0;
-
   if (Value *V = SimplifyInstruction(&PN, TD))
     return ReplaceInstUsesWith(PN, V);
 
@@ -826,18 +821,18 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
   // quick check to see if the PHI node only contains a single non-phi value, if
   // so, scan to see if the phi cycle is actually equal to that value.
   {
-    unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues();
+    unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues();
     // Scan for the first non-phi operand.
-    while (InValNo != NumOperandVals && 
+    while (InValNo != NumIncomingVals &&
            isa<PHINode>(PN.getIncomingValue(InValNo)))
       ++InValNo;
 
-    if (InValNo != NumOperandVals) {
-      Value *NonPhiInVal = PN.getOperand(InValNo);
+    if (InValNo != NumIncomingVals) {
+      Value *NonPhiInVal = PN.getIncomingValue(InValNo);
       
       // Scan the rest of the operands to see if there are any conflicts, if so
       // there is no need to recursively scan other phis.
-      for (++InValNo; InValNo != NumOperandVals; ++InValNo) {
+      for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {
         Value *OpVal = PN.getIncomingValue(InValNo);
         if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
           break;
@@ -846,7 +841,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
       // If we scanned over all operands, then we have one unique value plus
       // phi values.  Scan PHI nodes to see if they all merge in each other or
       // the value.
-      if (InValNo == NumOperandVals) {
+      if (InValNo == NumIncomingVals) {
         SmallPtrSet<PHINode*, 16> ValueEqualPHIs;
         if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
           return ReplaceInstUsesWith(PN, NonPhiInVal);
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 97abc76..61a433a 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -214,7 +214,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
         unsigned OpToFold = 0;
         if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
           OpToFold = 1;
-        } else  if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
+        } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
           OpToFold = 2;
         }
 
@@ -227,9 +227,16 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
             Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C);
             InsertNewInstBefore(NewSel, SI);
             NewSel->takeName(TVI);
-            if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI))
-              return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel);
-            llvm_unreachable("Unknown instruction!!");
+            BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI);
+            BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(),
+                                                        FalseVal, NewSel);
+            if (isa<PossiblyExactOperator>(BO))
+              BO->setIsExact(TVI_BO->isExact());
+            if (isa<OverflowingBinaryOperator>(BO)) {
+              BO->setHasNoUnsignedWrap(TVI_BO->hasNoUnsignedWrap());
+              BO->setHasNoSignedWrap(TVI_BO->hasNoSignedWrap());
+            }
+            return BO;
           }
         }
       }
@@ -243,7 +250,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
         unsigned OpToFold = 0;
         if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
           OpToFold = 1;
-        } else  if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
+        } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
           OpToFold = 2;
         }
 
@@ -256,9 +263,16 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
             Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp);
             InsertNewInstBefore(NewSel, SI);
             NewSel->takeName(FVI);
-            if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI))
-              return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel);
-            llvm_unreachable("Unknown instruction!!");
+            BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI);
+            BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(),
+                                                        TrueVal, NewSel);
+            if (isa<PossiblyExactOperator>(BO))
+              BO->setIsExact(FVI_BO->isExact());
+            if (isa<OverflowingBinaryOperator>(BO)) {
+              BO->setHasNoUnsignedWrap(FVI_BO->hasNoUnsignedWrap());
+              BO->setHasNoSignedWrap(FVI_BO->hasNoSignedWrap());
+            }
+            return BO;
           }
         }
       }
@@ -424,6 +438,19 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
       return ReplaceInstUsesWith(SI, TrueVal);
     /// NOTE: if we wanted to, this is where to detect integer MIN/MAX
   }
+
+  if (isa<Constant>(CmpRHS)) {
+    if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) {
+      // Transform (X == C) ? X : Y -> (X == C) ? C : Y
+      SI.setOperand(1, CmpRHS);
+      Changed = true;
+    } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) {
+      // Transform (X != C) ? Y : X -> (X != C) ? Y : C
+      SI.setOperand(2, CmpRHS);
+      Changed = true;
+    }
+  }
+
   return Changed ? &SI : 0;
 }
 
@@ -503,9 +530,8 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
   if (!IC || !IC->isEquality())
     return 0;
 
-  if (ConstantInt *C = dyn_cast<ConstantInt>(IC->getOperand(1)))
-    if (!C->isZero())
-      return 0;
+  if (!match(IC->getOperand(1), m_Zero()))
+    return 0;
 
   ConstantInt *AndRHS;
   Value *LHS = IC->getOperand(0);
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index a7f8005..811f949 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -644,7 +644,14 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
       return &I;
     }
   }
-  
+
+  // (C1 << A) << C2 -> (C1 << C2) << A
+  Constant *C1, *C2;
+  Value *A;
+  if (match(I.getOperand(0), m_OneUse(m_Shl(m_Constant(C1), m_Value(A)))) &&
+      match(I.getOperand(1), m_Constant(C2)))
+    return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A);
+
   return 0;    
 }
 
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index bda8cea..6e727ce 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -684,6 +684,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   case Instruction::SRem:
     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      // X % -1 demands all the bits because we don't want to introduce
+      // INT_MIN % -1 (== undef) by accident.
+      if (Rem->isAllOnesValue())
+        break;
       APInt RA = Rem->getValue().abs();
       if (RA.isPowerOf2()) {
         if (DemandedMask.ult(RA))    // srem won't affect demanded bits
@@ -712,6 +716,18 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
       }
     }
+
+    // The sign bit is the LHS's sign bit, except when the result of the
+    // remainder is zero.
+    if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
+      APInt Mask2 = APInt::getSignBit(BitWidth);
+      APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+      ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne,
+                        Depth+1);
+      // If it's known zero, our sign bit is also zero.
+      if (LHSKnownZero.isNegative())
+        KnownZero |= LHSKnownZero;
+    }
     break;
   case Instruction::URem: {
     APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 5caa12d..ad6a8d0 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -230,8 +230,16 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
                                           ConstantInt::get(Int32Ty,
                                                            SrcIdx, false));
       }
+    } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+      // Canonicalize extractelement(cast) -> cast(extractelement)
+      // bitcasts can change the number of vector elements and they cost nothing
+      if (CI->hasOneUse() && EI.hasOneUse() &&
+          (CI->getOpcode() != Instruction::BitCast)) {
+        Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
+                                                  EI.getIndexOperand());
+        return CastInst::Create(CI->getOpcode(), EE, EI.getType());
+      }
     }
-    // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement)
   }
   return 0;
 }
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 9100a85..32009c3 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -53,6 +53,7 @@ public:
   void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
     assert(Worklist.empty() && "Worklist must be empty to add initial group");
     Worklist.reserve(NumEntries+16);
+    WorklistMap.resize(NumEntries);
     DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
     for (; NumEntries; --NumEntries) {
       Instruction *I = List[NumEntries-1];
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 37123d0..7a84598 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -76,7 +76,6 @@ INITIALIZE_PASS(InstCombiner, "instcombine",
                 "Combine redundant instructions", false, false)
 
 void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addPreservedID(LCSSAID);
   AU.setPreservesCFG();
 }
 
@@ -600,8 +599,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
   }
 
   // Okay, we can do the transformation: create the new PHI node.
-  PHINode *NewPN = PHINode::Create(I.getType(), "");
-  NewPN->reserveOperandSpace(PN->getNumOperands()/2);
+  PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues(), "");
   InsertNewInstBefore(NewPN, *PN);
   NewPN->takeName(PN);
   
@@ -850,22 +848,23 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
                                   Indices.end(), GEP.getName());
   }
-  
+
   // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
   Value *StrippedPtr = PtrOp->stripPointerCasts();
-  if (StrippedPtr != PtrOp) {
-    const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
+  const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
+  if (StrippedPtr != PtrOp &&
+    StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
 
     bool HasZeroPointerIndex = false;
     if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
       HasZeroPointerIndex = C->isZero();
-    
+
     // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
     // into     : GEP [10 x i8]* X, i32 0, ...
     //
     // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
     //           into     : GEP i8* X, ...
-    // 
+    //
     // This occurs when the program declares an array extern like "int X[];"
     if (HasZeroPointerIndex) {
       const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
@@ -976,7 +975,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       }
     }
   }
-  
+
   /// See if we can simplify:
   ///   X = bitcast A* to B*
   ///   Y = gep X, <...constant indices...>
@@ -984,12 +983,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.
   if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
     if (TD &&
-        !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
+        !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices() &&
+        StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
+
       // Determine how much the GEP moves the pointer.  We are guaranteed to get
       // a constant back from EmitGEPOffset.
       ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP));
       int64_t Offset = OffsetV->getSExtValue();
-      
+
       // If this GEP instruction doesn't move the pointer, just replace the GEP
       // with a bitcast of the real input to the dest type.
       if (Offset == 0) {
@@ -1635,7 +1636,6 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
 
 
 bool InstCombiner::runOnFunction(Function &F) {
-  MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
   TD = getAnalysisIfAvailable<TargetData>();
 
   
@@ -1648,6 +1648,10 @@ bool InstCombiner::runOnFunction(Function &F) {
   
   bool EverMadeChange = false;
 
+  // Lower dbg.declare intrinsics otherwise their value may be clobbered
+  // by instcombiner.
+  EverMadeChange = LowerDbgDeclare(F);
+
   // Iterate while there is work to do.
   unsigned Iteration = 0;
   while (DoOneIteration(F, Iteration++))
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 0ac1cb0..5700ac8 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_llvm_library(LLVMInstrumentation
   EdgeProfiling.cpp
+  GCOVProfiling.cpp
   Instrumentation.cpp
   OptimalEdgeProfiling.cpp
   PathProfiling.cpp
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
new file mode 100644
index 0000000..2425342
--- /dev/null
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -0,0 +1,638 @@
+//===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements GCOV-style profiling. When this pass is run it emits
+// "gcno" files next to the existing source, and instruments the code that runs
+// to records the edges between blocks that run and emit a complementary "gcda"
+// file on exit.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "insert-gcov-profiling"
+
+#include "ProfilingUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/PathV2.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/UniqueVector.h"
+#include <string>
+#include <utility>
+using namespace llvm;
+
+namespace {
+  class GCOVProfiler : public ModulePass {
+    bool runOnModule(Module &M);
+  public:
+    static char ID;
+    GCOVProfiler()
+        : ModulePass(ID), EmitNotes(true), EmitData(true) {
+      initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
+    }
+    GCOVProfiler(bool EmitNotes, bool EmitData)
+        : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData) {
+      assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
+      initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
+    }
+    virtual const char *getPassName() const {
+      return "GCOV Profiler";
+    }
+
+  private:
+    // Create the GCNO files for the Module based on DebugInfo.
+    void emitGCNO(DebugInfoFinder &DIF);
+
+    // Modify the program to track transitions along edges and call into the
+    // profiling runtime to emit .gcda files when run.
+    bool emitProfileArcs(DebugInfoFinder &DIF);
+
+    // Get pointers to the functions in the runtime library.
+    Constant *getStartFileFunc();
+    Constant *getIncrementIndirectCounterFunc();
+    Constant *getEmitFunctionFunc();
+    Constant *getEmitArcsFunc();
+    Constant *getEndFileFunc();
+
+    // Create or retrieve an i32 state value that is used to represent the
+    // pred block number for certain non-trivial edges.
+    GlobalVariable *getEdgeStateValue();
+
+    // Produce a table of pointers to counters, by predecessor and successor
+    // block number.
+    GlobalVariable *buildEdgeLookupTable(Function *F,
+                                         GlobalVariable *Counter,
+                                         const UniqueVector<BasicBlock *> &Preds,
+                                         const UniqueVector<BasicBlock *> &Succs);
+
+    // Add the function to write out all our counters to the global destructor
+    // list.
+    void insertCounterWriteout(DebugInfoFinder &,
+                               SmallVector<std::pair<GlobalVariable *,
+                                                     uint32_t>, 8> &);
+
+    bool EmitNotes;
+    bool EmitData;
+
+    Module *M;
+    LLVMContext *Ctx;
+  };
+}
+
+char GCOVProfiler::ID = 0;
+INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
+                "Insert instrumentation for GCOV profiling", false, false)
+
+ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData) {
+  return new GCOVProfiler(EmitNotes, EmitData);
+}
+
+static DISubprogram findSubprogram(DIScope Scope) {
+  while (!Scope.isSubprogram()) {
+    assert(Scope.isLexicalBlock() &&
+           "Debug location not lexical block or subprogram");
+    Scope = DILexicalBlock(Scope).getContext();
+  }
+  return DISubprogram(Scope);
+}
+
+namespace {
+  class GCOVRecord {
+   protected:
+    static const char *LinesTag;
+    static const char *FunctionTag;
+    static const char *BlockTag;
+    static const char *EdgeTag;
+
+    GCOVRecord() {}
+
+    void writeBytes(const char *Bytes, int Size) {
+      os->write(Bytes, Size);
+    }
+
+    void write(uint32_t i) {
+      writeBytes(reinterpret_cast<char*>(&i), 4);
+    }
+
+    // Returns the length measured in 4-byte blocks that will be used to
+    // represent this string in a GCOV file
+    unsigned lengthOfGCOVString(StringRef s) {
+      // A GCOV string is a length, followed by a NUL, then between 0 and 3 NULs
+      // padding out to the next 4-byte word. The length is measured in 4-byte
+      // words including padding, not bytes of actual string.
+      return (s.size() + 5) / 4;
+    }
+
+    void writeGCOVString(StringRef s) {
+      uint32_t Len = lengthOfGCOVString(s);
+      write(Len);
+      writeBytes(s.data(), s.size());
+
+      // Write 1 to 4 bytes of NUL padding.
+      assert((unsigned)(4 - (s.size() % 4)) > 0);
+      assert((unsigned)(4 - (s.size() % 4)) <= 4);
+      writeBytes("\0\0\0\0", 4 - (s.size() % 4));
+    }
+
+    raw_ostream *os;
+  };
+  const char *GCOVRecord::LinesTag = "\0\0\x45\x01";
+  const char *GCOVRecord::FunctionTag = "\0\0\0\1";
+  const char *GCOVRecord::BlockTag = "\0\0\x41\x01";
+  const char *GCOVRecord::EdgeTag = "\0\0\x43\x01";
+
+  class GCOVFunction;
+  class GCOVBlock;
+
+  // Constructed only by requesting it from a GCOVBlock, this object stores a
+  // list of line numbers and a single filename, representing lines that belong
+  // to the block.
+  class GCOVLines : public GCOVRecord {
+   public:
+    void addLine(uint32_t Line) {
+      Lines.push_back(Line);
+    }
+
+    uint32_t length() {
+      return lengthOfGCOVString(Filename) + 2 + Lines.size();
+    }
+
+   private:
+    friend class GCOVBlock;
+
+    GCOVLines(std::string Filename, raw_ostream *os)
+        : Filename(Filename) {
+      this->os = os;
+    }
+
+    std::string Filename;
+    SmallVector<uint32_t, 32> Lines;
+  };
+
+  // Represent a basic block in GCOV. Each block has a unique number in the
+  // function, number of lines belonging to each block, and a set of edges to
+  // other blocks.
+  class GCOVBlock : public GCOVRecord {
+   public:
+    GCOVLines &getFile(std::string Filename) {
+      GCOVLines *&Lines = LinesByFile[Filename];
+      if (!Lines) {
+        Lines = new GCOVLines(Filename, os);
+      }
+      return *Lines;
+    }
+
+    void addEdge(GCOVBlock &Successor) {
+      OutEdges.push_back(&Successor);
+    }
+
+    void writeOut() {
+      uint32_t Len = 3;
+      for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
+               E = LinesByFile.end(); I != E; ++I) {
+        Len += I->second->length();
+      }
+
+      writeBytes(LinesTag, 4);
+      write(Len);
+      write(Number);
+      for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
+               E = LinesByFile.end(); I != E; ++I) {
+        write(0);
+        writeGCOVString(I->second->Filename);
+        for (int i = 0, e = I->second->Lines.size(); i != e; ++i) {
+          write(I->second->Lines[i]);
+        }
+      }
+      write(0);
+      write(0);
+    }
+
+    ~GCOVBlock() {
+      DeleteContainerSeconds(LinesByFile);
+    }
+
+   private:
+    friend class GCOVFunction;
+
+    GCOVBlock(uint32_t Number, raw_ostream *os)
+        : Number(Number) {
+      this->os = os;
+    }
+
+    uint32_t Number;
+    StringMap<GCOVLines *> LinesByFile;
+    SmallVector<GCOVBlock *, 4> OutEdges;
+  };
+
+  // A function has a unique identifier, a checksum (we leave as zero) and a
+  // set of blocks and a map of edges between blocks. This is the only GCOV
+  // object users can construct, the blocks and lines will be rooted here.
+  class GCOVFunction : public GCOVRecord {
+   public:
+    GCOVFunction(DISubprogram SP, raw_ostream *os) {
+      this->os = os;
+
+      Function *F = SP.getFunction();
+      uint32_t i = 0;
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        Blocks[BB] = new GCOVBlock(i++, os);
+      }
+      ReturnBlock = new GCOVBlock(i++, os);
+
+      writeBytes(FunctionTag, 4);
+      uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
+          1 + lengthOfGCOVString(SP.getFilename()) + 1;
+      write(BlockLen);
+      uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP);
+      write(Ident);
+      write(0);  // checksum
+      writeGCOVString(SP.getName());
+      writeGCOVString(SP.getFilename());
+      write(SP.getLineNumber());
+    }
+
+    ~GCOVFunction() {
+      DeleteContainerSeconds(Blocks);
+      delete ReturnBlock;
+    }
+
+    GCOVBlock &getBlock(BasicBlock *BB) {
+      return *Blocks[BB];
+    }
+
+    GCOVBlock &getReturnBlock() {
+      return *ReturnBlock;
+    }
+
+    void writeOut() {
+      // Emit count of blocks.
+      writeBytes(BlockTag, 4);
+      write(Blocks.size() + 1);
+      for (int i = 0, e = Blocks.size() + 1; i != e; ++i) {
+        write(0);  // No flags on our blocks.
+      }
+
+      // Emit edges between blocks.
+      for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
+               E = Blocks.end(); I != E; ++I) {
+        GCOVBlock &Block = *I->second;
+        if (Block.OutEdges.empty()) continue;
+
+        writeBytes(EdgeTag, 4);
+        write(Block.OutEdges.size() * 2 + 1);
+        write(Block.Number);
+        for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) {
+          write(Block.OutEdges[i]->Number);
+          write(0);  // no flags
+        }
+      }
+
+      // Emit lines for each block.
+      for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
+               E = Blocks.end(); I != E; ++I) {
+        I->second->writeOut();
+      }
+    }
+
+   private:
+    DenseMap<BasicBlock *, GCOVBlock *> Blocks;
+    GCOVBlock *ReturnBlock;
+  };
+}
+
+// Replace the stem of a file, or add one if missing.
+static std::string replaceStem(std::string OrigFilename, std::string NewStem) {
+  return (sys::path::stem(OrigFilename) + "." + NewStem).str();
+}
+
+bool GCOVProfiler::runOnModule(Module &M) {
+  this->M = &M;
+  Ctx = &M.getContext();
+
+  DebugInfoFinder DIF;
+  DIF.processModule(M);
+
+  if (EmitNotes) emitGCNO(DIF);
+  if (EmitData) return emitProfileArcs(DIF);
+  return false;
+}
+
+void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) {
+  DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles;
+  for (DebugInfoFinder::iterator I = DIF.compile_unit_begin(),
+           E = DIF.compile_unit_end(); I != E; ++I) {
+    // Each compile unit gets its own .gcno file. This means that whether we run
+    // this pass over the original .o's as they're produced, or run it after
+    // LTO, we'll generate the same .gcno files.
+
+    DICompileUnit CU(*I);
+    raw_fd_ostream *&out = GcnoFiles[CU];
+    std::string ErrorInfo;
+    out = new raw_fd_ostream(replaceStem(CU.getFilename(), "gcno").c_str(),
+                             ErrorInfo, raw_fd_ostream::F_Binary);
+    out->write("oncg*404MVLL", 12);
+  }
+
+  for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(),
+           SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) {
+    DISubprogram SP(*SPI);
+    raw_fd_ostream *&os = GcnoFiles[SP.getCompileUnit()];
+
+    Function *F = SP.getFunction();
+    if (!F) continue;
+    GCOVFunction Func(SP, os);
+
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      GCOVBlock &Block = Func.getBlock(BB);
+      TerminatorInst *TI = BB->getTerminator();
+      if (int successors = TI->getNumSuccessors()) {
+        for (int i = 0; i != successors; ++i) {
+          Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
+        }
+      } else if (isa<ReturnInst>(TI)) {
+        Block.addEdge(Func.getReturnBlock());
+      }
+
+      uint32_t Line = 0;
+      for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) {
+        const DebugLoc &Loc = I->getDebugLoc();
+        if (Loc.isUnknown()) continue;
+        if (Line == Loc.getLine()) continue;
+        Line = Loc.getLine();
+        if (SP != findSubprogram(DIScope(Loc.getScope(*Ctx)))) continue;
+
+        GCOVLines &Lines = Block.getFile(SP.getFilename());
+        Lines.addLine(Loc.getLine());
+      }
+    }
+    Func.writeOut();
+  }
+
+  for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator
+           I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) {
+    raw_fd_ostream *&out = I->second;
+    out->write("\0\0\0\0\0\0\0\0", 8);  // EOF
+    out->close();
+    delete out;
+  }
+}
+
+bool GCOVProfiler::emitProfileArcs(DebugInfoFinder &DIF) {
+  if (DIF.subprogram_begin() == DIF.subprogram_end())
+    return false;
+
+  SmallVector<std::pair<GlobalVariable *, uint32_t>, 8> CountersByIdent;
+  for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(),
+           SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) {
+    DISubprogram SP(*SPI);
+    Function *F = SP.getFunction();
+    if (!F) continue;
+
+    unsigned Edges = 0;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      TerminatorInst *TI = BB->getTerminator();
+      if (isa<ReturnInst>(TI))
+        ++Edges;
+      else
+        Edges += TI->getNumSuccessors();
+    }
+
+    const ArrayType *CounterTy =
+        ArrayType::get(Type::getInt64Ty(*Ctx), Edges);
+    GlobalVariable *Counters =
+        new GlobalVariable(*M, CounterTy, false,
+                           GlobalValue::InternalLinkage,
+                           Constant::getNullValue(CounterTy),
+                           "__llvm_gcov_ctr", 0, false, 0);
+    CountersByIdent.push_back(
+        std::make_pair(Counters, reinterpret_cast<intptr_t>((MDNode*)SP)));
+
+    UniqueVector<BasicBlock *> ComplexEdgePreds;
+    UniqueVector<BasicBlock *> ComplexEdgeSuccs;
+
+    unsigned Edge = 0;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      TerminatorInst *TI = BB->getTerminator();
+      int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
+      if (Successors) {
+        IRBuilder<> Builder(TI);
+
+        if (Successors == 1) {
+          Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
+                                                              Edge);
+          Value *Count = Builder.CreateLoad(Counter);
+          Count = Builder.CreateAdd(Count,
+                                    ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+          Builder.CreateStore(Count, Counter);
+        } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+          Value *Sel = Builder.CreateSelect(
+              BI->getCondition(),
+              ConstantInt::get(Type::getInt64Ty(*Ctx), Edge),
+              ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1));
+          SmallVector<Value *, 2> Idx;
+          Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx)));
+          Idx.push_back(Sel);
+          Value *Counter = Builder.CreateInBoundsGEP(Counters,
+                                                     Idx.begin(), Idx.end());
+          Value *Count = Builder.CreateLoad(Counter);
+          Count = Builder.CreateAdd(Count,
+                                    ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+          Builder.CreateStore(Count, Counter);
+        } else {
+          ComplexEdgePreds.insert(BB);
+          for (int i = 0; i != Successors; ++i)
+            ComplexEdgeSuccs.insert(TI->getSuccessor(i));
+        }
+        Edge += Successors;
+      }
+    }
+
+    if (!ComplexEdgePreds.empty()) {
+      GlobalVariable *EdgeTable =
+          buildEdgeLookupTable(F, Counters,
+                               ComplexEdgePreds, ComplexEdgeSuccs);
+      GlobalVariable *EdgeState = getEdgeStateValue();
+
+      const Type *Int32Ty = Type::getInt32Ty(*Ctx);
+      for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
+        IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
+        Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState);
+      }
+      for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
+        // call runtime to perform increment
+        IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstNonPHI());
+        Value *CounterPtrArray =
+            Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
+                                               i * ComplexEdgePreds.size());
+        Builder.CreateCall2(getIncrementIndirectCounterFunc(),
+                            EdgeState, CounterPtrArray);
+        // clear the predecessor number
+        Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState);
+      }
+    }
+  }
+
+  insertCounterWriteout(DIF, CountersByIdent);
+
+  return true;
+}
+
+// All edges with successors that aren't branches are "complex", because it
+// requires complex logic to pick which counter to update.
+GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
+    Function *F,
+    GlobalVariable *Counters,
+    const UniqueVector<BasicBlock *> &Preds,
+    const UniqueVector<BasicBlock *> &Succs) {
+  // TODO: support invoke, threads. We rely on the fact that nothing can modify
+  // the whole-Module pred edge# between the time we set it and the time we next
+  // read it. Threads and invoke make this untrue.
+
+  // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]].
+  const Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
+  const ArrayType *EdgeTableTy = ArrayType::get(
+      Int64PtrTy, Succs.size() * Preds.size());
+
+  Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()];
+  Constant *NullValue = Constant::getNullValue(Int64PtrTy);
+  for (int i = 0, ie = Succs.size() * Preds.size(); i != ie; ++i)
+    EdgeTable[i] = NullValue;
+
+  unsigned Edge = 0;
+  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+    TerminatorInst *TI = BB->getTerminator();
+    int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
+    if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) {
+      for (int i = 0; i != Successors; ++i) {
+        BasicBlock *Succ = TI->getSuccessor(i);
+        IRBuilder<> builder(Succ);
+        Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0,
+                                                            Edge + i);
+        EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) +
+                  (Preds.idFor(BB)-1)] = cast<Constant>(Counter);
+      }
+    }
+    Edge += Successors;
+  }
+
+  GlobalVariable *EdgeTableGV =
+      new GlobalVariable(
+          *M, EdgeTableTy, true, GlobalValue::InternalLinkage,
+          ConstantArray::get(EdgeTableTy,
+                             &EdgeTable[0], Succs.size() * Preds.size()),
+          "__llvm_gcda_edge_table");
+  EdgeTableGV->setUnnamedAddr(true);
+  return EdgeTableGV;
+}
+
+Constant *GCOVProfiler::getStartFileFunc() {
+  const Type *Args[] = { Type::getInt8PtrTy(*Ctx) };
+  const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+                                              Args, false);
+  return M->getOrInsertFunction("llvm_gcda_start_file", FTy);
+}
+
+Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
+  const Type *Args[] = {
+    Type::getInt32PtrTy(*Ctx),                  // uint32_t *predecessor
+    Type::getInt64PtrTy(*Ctx)->getPointerTo(),  // uint64_t **state_table_row
+  };
+  const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+                                              Args, false);
+  return M->getOrInsertFunction("llvm_gcda_increment_indirect_counter", FTy);
+}
+
+Constant *GCOVProfiler::getEmitFunctionFunc() {
+  const Type *Args[] = { Type::getInt32Ty(*Ctx) };
+  const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+                                              Args, false);
+  return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
+}
+
+Constant *GCOVProfiler::getEmitArcsFunc() {
+  const Type *Args[] = {
+    Type::getInt32Ty(*Ctx),     // uint32_t num_counters
+    Type::getInt64PtrTy(*Ctx),  // uint64_t *counters
+  };
+  const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+                                              Args, false);
+  return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
+}
+
+Constant *GCOVProfiler::getEndFileFunc() {
+  const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+  return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
+}
+
+GlobalVariable *GCOVProfiler::getEdgeStateValue() {
+  GlobalVariable *GV = M->getGlobalVariable("__llvm_gcov_global_state_pred");
+  if (!GV) {
+    GV = new GlobalVariable(*M, Type::getInt32Ty(*Ctx), false,
+                            GlobalValue::InternalLinkage,
+                            ConstantInt::get(Type::getInt32Ty(*Ctx),
+                                             0xffffffff),
+                            "__llvm_gcov_global_state_pred");
+    GV->setUnnamedAddr(true);
+  }
+  return GV;
+}
+
+void GCOVProfiler::insertCounterWriteout(
+    DebugInfoFinder &DIF,
+    SmallVector<std::pair<GlobalVariable *, uint32_t>, 8> &CountersByIdent) {
+  const FunctionType *WriteoutFTy =
+      FunctionType::get(Type::getVoidTy(*Ctx), false);
+  Function *WriteoutF = Function::Create(WriteoutFTy,
+                                         GlobalValue::InternalLinkage,
+                                         "__llvm_gcov_writeout", M);
+  WriteoutF->setUnnamedAddr(true);
+  BasicBlock *BB = BasicBlock::Create(*Ctx, "", WriteoutF);
+  IRBuilder<> Builder(BB);
+
+  Constant *StartFile = getStartFileFunc();
+  Constant *EmitFunction = getEmitFunctionFunc();
+  Constant *EmitArcs = getEmitArcsFunc();
+  Constant *EndFile = getEndFileFunc();
+
+  for (DebugInfoFinder::iterator CUI = DIF.compile_unit_begin(),
+           CUE = DIF.compile_unit_end(); CUI != CUE; ++CUI) {
+    DICompileUnit compile_unit(*CUI);
+    std::string FilenameGcda = replaceStem(compile_unit.getFilename(), "gcda");
+    Builder.CreateCall(StartFile,
+                       Builder.CreateGlobalStringPtr(FilenameGcda));
+    for (SmallVector<std::pair<GlobalVariable *, uint32_t>, 8>::iterator
+             I = CountersByIdent.begin(), E = CountersByIdent.end();
+         I != E; ++I) {
+      Builder.CreateCall(EmitFunction, ConstantInt::get(Type::getInt32Ty(*Ctx),
+                                                        I->second));
+      GlobalVariable *GV = I->first;
+      unsigned Arcs =
+          cast<ArrayType>(GV->getType()->getElementType())->getNumElements();
+      Builder.CreateCall2(EmitArcs,
+                          ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs),
+                          Builder.CreateConstGEP2_64(GV, 0, 0));
+    }
+    Builder.CreateCall(EndFile);
+  }
+  Builder.CreateRetVoid();
+
+  InsertProfilingShutdownCall(WriteoutF, M);
+}
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index 96ed4fa..71adc1e 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -23,6 +23,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
   initializeEdgeProfilerPass(Registry);
   initializeOptimalEdgeProfilerPass(Registry);
   initializePathProfilerPass(Registry);
+  initializeGCOVProfilerPass(Registry);
 }
 
 /// LLVMInitializeInstrumentation - C binding for
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index 829da6b..f76c77e 100644
--- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This module privides means for calculating a maximum spanning tree for a
+// This module provides means for calculating a maximum spanning tree for a
 // given set of weighted edges. The type parameter T is the type of a node.
 //
 //===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index c85a1a9..e09f882 100644
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "insert-optimal-edge-profiling"
 #include "ProfilingUtils.h"
+#include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/Passes.h"
@@ -26,7 +27,6 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "MaximumSpanningTree.h"
-#include <set>
 using namespace llvm;
 
 STATISTIC(NumEdgesInserted, "The # of edges inserted.");
@@ -120,14 +120,14 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
   NumEdgesInserted = 0;
 
   std::vector<Constant*> Initializer(NumEdges);
-  Constant* Zero = ConstantInt::get(Int32, 0);
-  Constant* Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);
+  Constant *Zero = ConstantInt::get(Int32, 0);
+  Constant *Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);
 
   // Instrument all of the edges not in MST...
   unsigned i = 0;
   for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
     if (F->isDeclaration()) continue;
-    DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+    DEBUG(dbgs() << "Working on " << F->getNameStr() << "\n");
 
     // Calculate a Maximum Spanning Tree with the edge weights determined by
     // ProfileEstimator. ProfileEstimator also assign weights to the virtual
@@ -139,17 +139,17 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
     ProfileInfo::EdgeWeights ECs =
       getAnalysis<ProfileInfo>(*F).getEdgeWeights(F);
     std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
-    MaximumSpanningTree<BasicBlock> MST (EdgeVector);
-    std::stable_sort(MST.begin(),MST.end());
+    MaximumSpanningTree<BasicBlock> MST(EdgeVector);
+    std::stable_sort(MST.begin(), MST.end());
 
     // Check if (0,entry) not in the MST. If not, instrument edge
     // (IncrementCounterInBlock()) and set the counter initially to zero, if
     // the edge is in the MST the counter is initialised to -1.
 
     BasicBlock *entry = &(F->getEntryBlock());
-    ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry);
+    ProfileInfo::Edge edge = ProfileInfo::getEdge(0, entry);
     if (!std::binary_search(MST.begin(), MST.end(), edge)) {
-      printEdgeCounter(edge,entry,i);
+      printEdgeCounter(edge, entry, i);
       IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted;
       Initializer[i++] = (Zero);
     } else{
@@ -170,9 +170,9 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
       // has no successors, the virtual edge (BB,0) is processed.
       TerminatorInst *TI = BB->getTerminator();
       if (TI->getNumSuccessors() == 0) {
-        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0);
+        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB, 0);
         if (!std::binary_search(MST.begin(), MST.end(), edge)) {
-          printEdgeCounter(edge,BB,i);
+          printEdgeCounter(edge, BB, i);
           IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
           Initializer[i++] = (Zero);
         } else{
@@ -195,11 +195,11 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
           // otherwise insert it in the successor block.
           if (TI->getNumSuccessors() == 1) {
             // Insert counter at the start of the block
-            printEdgeCounter(edge,BB,i);
+            printEdgeCounter(edge, BB, i);
             IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
           } else {
             // Insert counter at the start of the block
-            printEdgeCounter(edge,Succ,i);
+            printEdgeCounter(edge, Succ, i);
             IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted;
           }
           Initializer[i++] = (Zero);
@@ -212,9 +212,9 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
 
   // Check if the number of edges counted at first was the number of edges we
   // considered for instrumentation.
-  assert(i==NumEdges && "the number of edges in counting array is wrong");
+  assert(i == NumEdges && "the number of edges in counting array is wrong");
 
-  // Assing the now completely defined initialiser to the array.
+  // Assign the now completely defined initialiser to the array.
   Constant *init = ConstantArray::get(ATy, Initializer);
   Counters->setInitializer(init);
 
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
index 6449b39..6b3f12d 100644
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ b/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -63,7 +63,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Instrumentation.h"
-#include <map>
 #include <vector>
 
 #define HASH_THRESHHOLD 100000
@@ -259,7 +258,7 @@ private:
 };
 
 // ---------------------------------------------------------------------------
-// PathProfiler is a module pass which intruments path profiling instructions
+// PathProfiler is a module pass which instruments path profiling instructions
 // ---------------------------------------------------------------------------
 class PathProfiler : public ModulePass {
 private:
@@ -389,6 +388,9 @@ namespace llvm {
 
   // BallLarusEdge << operator overloading
   raw_ostream& operator<<(raw_ostream& os,
+                          const BLInstrumentationEdge& edge)
+      LLVM_ATTRIBUTE_USED;
+  raw_ostream& operator<<(raw_ostream& os,
                           const BLInstrumentationEdge& edge) {
     os << "[" << edge.getSource()->getName() << " -> "
        << edge.getTarget()->getName() << "] init: "
@@ -929,14 +931,16 @@ BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
 void PathProfiler::preparePHI(BLInstrumentationNode* node) {
   BasicBlock* block = node->getBlock();
   BasicBlock::iterator insertPoint = block->getFirstNonPHI();
-  PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), "pathNumber",
+  pred_iterator PB = pred_begin(node->getBlock()),
+          PE = pred_end(node->getBlock());
+  PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context),
+                                 std::distance(PB, PE), "pathNumber",
                                  insertPoint );
   node->setPathPHI(phi);
   node->setStartingPathNumber(phi);
   node->setEndingPathNumber(phi);
 
-  for(pred_iterator predIt = pred_begin(node->getBlock()),
-        end = pred_end(node->getBlock()); predIt != end; predIt++) {
+  for(pred_iterator predIt = PB; predIt != PE; predIt++) {
     BasicBlock* pred = (*predIt);
 
     if(pred != NULL)
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index b57bbf6..7435bc3 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -110,7 +110,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
                                    GlobalValue *CounterArray, bool beginning) {
   // Insert the increment after any alloca or PHI instructions...
   BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() :
-                BB->getTerminator();
+                                   BB->getTerminator();
   while (isa<AllocaInst>(InsertPos))
     ++InsertPos;
 
@@ -121,8 +121,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
   Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
   Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
   Constant *ElementPtr =
-    ConstantExpr::getGetElementPtr(CounterArray, &Indices[0],
-                                          Indices.size());
+    ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size());
 
   // Load, increment and store the value back.
   Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
@@ -131,3 +130,41 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
                                          "NewFuncCounter", InsertPos);
   new StoreInst(NewVal, ElementPtr, InsertPos);
 }
+
+void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
+  // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those
+  // types.
+  const Type *GlobalDtorElems[2] = {
+    Type::getInt32Ty(Mod->getContext()),
+    FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo()
+  };
+  const StructType *GlobalDtorElemTy =
+      StructType::get(Mod->getContext(), GlobalDtorElems, false);
+
+  // Construct the new element we'll be adding.
+  Constant *Elem[2] = {
+    ConstantInt::get(Type::getInt32Ty(Mod->getContext()), 65535),
+    ConstantExpr::getBitCast(Callee, GlobalDtorElems[1])
+  };
+
+  // If llvm.global_dtors exists, make a copy of the things in its list and
+  // delete it, to replace it with one that has a larger array type.
+  std::vector<Constant *> dtors;
+  if (GlobalVariable *GlobalDtors = Mod->getNamedGlobal("llvm.global_dtors")) {
+    if (ConstantArray *InitList =
+        dyn_cast<ConstantArray>(GlobalDtors->getInitializer())) {
+      for (unsigned i = 0, e = InitList->getType()->getNumElements();
+           i != e; ++i)
+        dtors.push_back(cast<Constant>(InitList->getOperand(i)));
+    }
+    GlobalDtors->eraseFromParent();
+  }
+
+  // Build up llvm.global_dtors with our new item in it.
+  GlobalVariable *GlobalDtors = new GlobalVariable(
+      *Mod, ArrayType::get(GlobalDtorElemTy, 1), false,
+      GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors");
+  dtors.push_back(ConstantStruct::get(Mod->getContext(), Elem, 2, false));
+  GlobalDtors->setInitializer(ConstantArray::get(
+      cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors));
+}
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.h b/lib/Transforms/Instrumentation/ProfilingUtils.h
index a76e357..09b2217 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.h
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.h
@@ -18,9 +18,10 @@
 #define PROFILINGUTILS_H
 
 namespace llvm {
+  class BasicBlock;
   class Function;
   class GlobalValue;
-  class BasicBlock;
+  class Module;
   class PointerType;
 
   void InsertProfilingInitCall(Function *MainFn, const char *FnName,
@@ -29,6 +30,7 @@ namespace llvm {
   void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
                                GlobalValue *CounterArray,
                                bool beginning = true);
+  void InsertProfilingShutdownCall(Function *Callee, Module *Mod);
 }
 
 #endif
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 106fb8f..fcf914f 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -7,7 +7,6 @@ add_llvm_library(LLVMScalarOpts
   DCE.cpp
   DeadStoreElimination.cpp
   EarlyCSE.cpp
-  GEPSplitter.cpp
   GVN.cpp
   IndVarSimplify.cpp
   JumpThreading.cpp
@@ -27,7 +26,6 @@ add_llvm_library(LLVMScalarOpts
   Scalar.cpp
   ScalarReplAggregates.cpp
   SimplifyCFGPass.cpp
-  SimplifyHalfPowrLibCalls.cpp
   SimplifyLibCalls.cpp
   Sink.cpp
   TailDuplication.cpp
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 9536939..0184390 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -47,21 +47,21 @@ using namespace llvm;
 using namespace llvm::PatternMatch;
 
 STATISTIC(NumBlocksElim, "Number of blocks eliminated");
-STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
-STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
+STATISTIC(NumPHIsElim,   "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim,   "Number of GEPs converted to casts");
 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
                       "sunken Cmps");
 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
                        "of sunken Casts");
 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
                           "computations were sunk");
-STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
-STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumExtsMoved,  "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses,    "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumRetsDup,    "Number of return instructions duplicated");
 
-static cl::opt<bool>
-CriticalEdgeSplit("cgp-critical-edge-splitting",
-                  cl::desc("Split critical edges during codegen prepare"),
-                  cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableBranchOpts(
+  "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
+  cl::desc("Disable branch optimizations in CodeGenPrepare"));
 
 namespace {
   class CodeGenPrepare : public FunctionPass {
@@ -76,15 +76,15 @@ namespace {
     /// update it.
     BasicBlock::iterator CurInstIterator;
 
-    /// BackEdges - Keep a set of all the loop back edges.
-    ///
-    SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges;
-
-    // Keeps track of non-local addresses that have been sunk into a block. This
-    // allows us to avoid inserting duplicate code for blocks with multiple
-    // load/stores of the same address.
+    /// Keeps track of non-local addresses that have been sunk into a block.
+    /// This allows us to avoid inserting duplicate code for blocks with
+    /// multiple load/stores of the same address.
     DenseMap<Value*, Value*> SunkAddrs;
 
+    /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
+    /// be updated.
+    bool ModifiedDT;
+
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit CodeGenPrepare(const TargetLowering *tli = 0)
@@ -98,10 +98,6 @@ namespace {
       AU.addPreserved<ProfileInfo>();
     }
 
-    virtual void releaseMemory() {
-      BackEdges.clear();
-    }
-
   private:
     bool EliminateMostlyEmptyBlocks(Function &F);
     bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
@@ -113,7 +109,7 @@ namespace {
     bool OptimizeCallInst(CallInst *CI);
     bool MoveExtToFormExtLoad(Instruction *I);
     bool OptimizeExtUses(Instruction *I);
-    void findLoopBackEdges(const Function &F);
+    bool DupRetToEnableTailCallOpts(ReturnInst *RI);
   };
 }
 
@@ -125,40 +121,42 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
   return new CodeGenPrepare(TLI);
 }
 
-/// findLoopBackEdges - Do a DFS walk to find loop back edges.
-///
-void CodeGenPrepare::findLoopBackEdges(const Function &F) {
-  SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
-  FindFunctionBackedges(F, Edges);
-  
-  BackEdges.insert(Edges.begin(), Edges.end());
-}
-
-
 bool CodeGenPrepare::runOnFunction(Function &F) {
   bool EverMadeChange = false;
 
+  ModifiedDT = false;
   DT = getAnalysisIfAvailable<DominatorTree>();
   PFI = getAnalysisIfAvailable<ProfileInfo>();
+
   // First pass, eliminate blocks that contain only PHI nodes and an
   // unconditional branch.
   EverMadeChange |= EliminateMostlyEmptyBlocks(F);
 
-  // Now find loop back edges, but only if they are being used to decide which
-  // critical edges to split.
-  if (CriticalEdgeSplit)
-    findLoopBackEdges(F);
-
   bool MadeChange = true;
   while (MadeChange) {
     MadeChange = false;
-    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+      BasicBlock *BB = I++;
       MadeChange |= OptimizeBlock(*BB);
+    }
     EverMadeChange |= MadeChange;
   }
 
   SunkAddrs.clear();
 
+  if (!DisableBranchOpts) {
+    MadeChange = false;
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+      MadeChange |= ConstantFoldTerminator(BB);
+
+    if (MadeChange)
+      ModifiedDT = true;
+    EverMadeChange |= MadeChange;
+  }
+
+  if (ModifiedDT && DT)
+    DT->DT->recalculate(F);
+
   return EverMadeChange;
 }
 
@@ -333,7 +331,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
   // The PHIs are now updated, change everything that refers to BB to use
   // DestBB and remove BB.
   BB->replaceAllUsesWith(DestBB);
-  if (DT) {
+  if (DT && !ModifiedDT) {
     BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
     BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
     BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
@@ -350,110 +348,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
   DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
 }
 
-/// FindReusablePredBB - Check all of the predecessors of the block DestPHI
-/// lives in to see if there is a block that we can reuse as a critical edge
-/// from TIBB.
-static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) {
-  BasicBlock *Dest = DestPHI->getParent();
-  
-  /// TIPHIValues - This array is lazily computed to determine the values of
-  /// PHIs in Dest that TI would provide.
-  SmallVector<Value*, 32> TIPHIValues;
-  
-  /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB.
-  unsigned TIBBEntryNo = 0;
-  
-  // Check to see if Dest has any blocks that can be used as a split edge for
-  // this terminator.
-  for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) {
-    BasicBlock *Pred = DestPHI->getIncomingBlock(pi);
-    // To be usable, the pred has to end with an uncond branch to the dest.
-    BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
-    if (!PredBr || !PredBr->isUnconditional())
-      continue;
-    // Must be empty other than the branch and debug info.
-    BasicBlock::iterator I = Pred->begin();
-    while (isa<DbgInfoIntrinsic>(I))
-      I++;
-    if (&*I != PredBr)
-      continue;
-    // Cannot be the entry block; its label does not get emitted.
-    if (Pred == &Dest->getParent()->getEntryBlock())
-      continue;
-    
-    // Finally, since we know that Dest has phi nodes in it, we have to make
-    // sure that jumping to Pred will have the same effect as going to Dest in
-    // terms of PHI values.
-    PHINode *PN;
-    unsigned PHINo = 0;
-    unsigned PredEntryNo = pi;
-    
-    bool FoundMatch = true;
-    for (BasicBlock::iterator I = Dest->begin();
-         (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
-      if (PHINo == TIPHIValues.size()) {
-        if (PN->getIncomingBlock(TIBBEntryNo) != TIBB)
-          TIBBEntryNo = PN->getBasicBlockIndex(TIBB);
-        TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo));
-      }
-      
-      // If the PHI entry doesn't work, we can't use this pred.
-      if (PN->getIncomingBlock(PredEntryNo) != Pred)
-        PredEntryNo = PN->getBasicBlockIndex(Pred);
-      
-      if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) {
-        FoundMatch = false;
-        break;
-      }
-    }
-    
-    // If we found a workable predecessor, change TI to branch to Succ.
-    if (FoundMatch)
-      return Pred;
-  }
-  return 0;  
-}
-
-
-/// SplitEdgeNicely - Split the critical edge from TI to its specified
-/// successor if it will improve codegen.  We only do this if the successor has
-/// phi nodes (otherwise critical edges are ok).  If there is already another
-/// predecessor of the succ that is empty (and thus has no phi nodes), use it
-/// instead of introducing a new block.
-static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
-                     SmallSet<std::pair<const BasicBlock*,
-                                        const BasicBlock*>, 8> &BackEdges,
-                             Pass *P) {
-  BasicBlock *TIBB = TI->getParent();
-  BasicBlock *Dest = TI->getSuccessor(SuccNum);
-  assert(isa<PHINode>(Dest->begin()) &&
-         "This should only be called if Dest has a PHI!");
-  PHINode *DestPHI = cast<PHINode>(Dest->begin());
-
-  // Do not split edges to EH landing pads.
-  if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI))
-    if (Invoke->getSuccessor(1) == Dest)
-      return;
-
-  // As a hack, never split backedges of loops.  Even though the copy for any
-  // PHIs inserted on the backedge would be dead for exits from the loop, we
-  // assume that the cost of *splitting* the backedge would be too high.
-  if (BackEdges.count(std::make_pair(TIBB, Dest)))
-    return;
-
-  if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) {
-    ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>();
-    if (PFI)
-      PFI->splitEdge(TIBB, Dest, ReuseBB);
-    Dest->removePredecessor(TIBB);
-    TI->setSuccessor(SuccNum, ReuseBB);
-    return;
-  }
-
-  SplitCriticalEdge(TI, SuccNum, P, true);
-}
-
-
 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
 /// sink it into user blocks to reduce the number of virtual
@@ -640,7 +534,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
     // happens.
     WeakVH IterHandle(CurInstIterator);
     
-    ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT);
+    ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0,
+                              ModifiedDT ? 0 : DT);
 
     // If the iterator instruction was recursively deleted, start over at the
     // start of the block.
@@ -666,6 +561,129 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
   return Simplifier.fold(CI, TD);
 }
 
+/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
+/// instructions to the predecessor to enable tail call optimizations. The
+/// case it is currently looking for is:
+/// bb0:
+///   %tmp0 = tail call i32 @f0()
+///   br label %return
+/// bb1:
+///   %tmp1 = tail call i32 @f1()
+///   br label %return
+/// bb2:
+///   %tmp2 = tail call i32 @f2()
+///   br label %return
+/// return:
+///   %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+///   ret i32 %retval
+///
+/// =>
+///
+/// bb0:
+///   %tmp0 = tail call i32 @f0()
+///   ret i32 %tmp0
+/// bb1:
+///   %tmp1 = tail call i32 @f1()
+///   ret i32 %tmp1
+/// bb2:
+///   %tmp2 = tail call i32 @f2()
+///   ret i32 %tmp2
+///
+bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
+  if (!TLI)
+    return false;
+
+  Value *V = RI->getReturnValue();
+  PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL;
+  if (V && !PN)
+    return false;
+
+  BasicBlock *BB = RI->getParent();
+  if (PN && PN->getParent() != BB)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  // See llvm::isInTailCallPosition().
+  const Function *F = BB->getParent();
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
+  // Make sure there are no instructions between the PHI and return, or that the
+  // return is the first instruction in the block.
+  if (PN) {
+    BasicBlock::iterator BI = BB->begin();
+    do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+    if (&*BI != RI)
+      return false;
+  } else {
+    BasicBlock::iterator BI = BB->begin();
+    while (isa<DbgInfoIntrinsic>(BI)) ++BI;
+    if (&*BI != RI)
+      return false;
+  }
+
+  /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
+  /// call.
+  SmallVector<CallInst*, 4> TailCalls;
+  if (PN) {
+    for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
+      CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
+      // Make sure the phi value is indeed produced by the tail call.
+      if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
+          TLI->mayBeEmittedAsTailCall(CI))
+        TailCalls.push_back(CI);
+    }
+  } else {
+    SmallPtrSet<BasicBlock*, 4> VisitedBBs;
+    for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+      if (!VisitedBBs.insert(*PI))
+        continue;
+
+      BasicBlock::InstListType &InstList = (*PI)->getInstList();
+      BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
+      BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
+      do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
+      if (RI == RE)
+        continue;
+
+      CallInst *CI = dyn_cast<CallInst>(&*RI);
+      if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
+        TailCalls.push_back(CI);
+    }
+  }
+
+  bool Changed = false;
+  for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
+    CallInst *CI = TailCalls[i];
+    CallSite CS(CI);
+
+    // Conservatively require the attributes of the call to match those of the
+    // return. Ignore noalias because it doesn't affect the call sequence.
+    unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes();
+    if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+      continue;
+
+    // Make sure the call instruction is followed by an unconditional branch to
+    // the return block.
+    BasicBlock *CallBB = CI->getParent();
+    BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
+    if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
+      continue;
+
+    // Duplicate the return into CallBB.
+    (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
+    ModifiedDT = Changed = true;
+    ++NumRetsDup;
+  }
+
+  // If we eliminated all predecessors of the block, delete the block now.
+  if (Changed && pred_begin(BB) == pred_end(BB))
+    BB->eraseFromParent();
+
+  return Changed;
+}
+
 //===----------------------------------------------------------------------===//
 // Memory Optimization
 //===----------------------------------------------------------------------===//
@@ -701,7 +719,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   // the addressing mode obtained from the non-PHI roots of the graph
   // are equivalent.
   Value *Consensus = 0;
-  unsigned NumUses = 0;
+  unsigned NumUsesConsensus = 0;
+  bool IsNumUsesConsensusValid = false;
   SmallVector<Instruction*, 16> AddrModeInsts;
   ExtAddrMode AddrMode;
   while (!worklist.empty()) {
@@ -728,16 +747,31 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     ExtAddrMode NewAddrMode =
       AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
                                    NewAddrModeInsts, *TLI);
-    
-    // Ensure that the obtained addressing mode is equivalent to that obtained
-    // for all other roots of the PHI traversal.  Also, when choosing one
-    // such root as representative, select the one with the most uses in order
-    // to keep the cost modeling heuristics in AddressingModeMatcher applicable.
-    if (!Consensus || NewAddrMode == AddrMode) {
-      if (V->getNumUses() > NumUses) {
+
+    // This check is broken into two cases with very similar code to avoid using
+    // getNumUses() as much as possible. Some values have a lot of uses, so
+    // calling getNumUses() unconditionally caused a significant compile-time
+    // regression.
+    if (!Consensus) {
+      Consensus = V;
+      AddrMode = NewAddrMode;
+      AddrModeInsts = NewAddrModeInsts;
+      continue;
+    } else if (NewAddrMode == AddrMode) {
+      if (!IsNumUsesConsensusValid) {
+        NumUsesConsensus = Consensus->getNumUses();
+        IsNumUsesConsensusValid = true;
+      }
+
+      // Ensure that the obtained addressing mode is equivalent to that obtained
+      // for all other roots of the PHI traversal.  Also, when choosing one
+      // such root as representative, select the one with the most uses in order
+      // to keep the cost modeling heuristics in AddressingModeMatcher
+      // applicable.
+      unsigned NumUses = V->getNumUses();
+      if (NumUses > NumUsesConsensus) {
         Consensus = V;
-        NumUses = V->getNumUses();
-        AddrMode = NewAddrMode;
+        NumUsesConsensus = NumUses;
         AddrModeInsts = NewAddrModeInsts;
       }
       continue;
@@ -855,11 +889,26 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
 
   MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
 
+  // If we have no uses, recursively delete the value and all dead instructions
+  // using it.
   if (Repl->use_empty()) {
+    // This can cause recursive deletion, which can invalidate our iterator.
+    // Use a WeakVH to hold onto it in case this happens.
+    WeakVH IterHandle(CurInstIterator);
+    BasicBlock *BB = CurInstIterator->getParent();
+    
     RecursivelyDeleteTriviallyDeadInstructions(Repl);
-    // This address is now available for reassignment, so erase the table entry;
-    // we don't want to match some completely different instruction.
-    SunkAddrs[Addr] = 0;
+
+    if (IterHandle != CurInstIterator) {
+      // If the iterator instruction was recursively deleted, start over at the
+      // start of the block.
+      CurInstIterator = BB->begin();
+      SunkAddrs.clear();
+    } else {
+      // This address is now available for reassignment, so erase the table
+      // entry; we don't want to match some completely different instruction.
+      SunkAddrs[Addr] = 0;
+    }    
   }
   ++NumMemoryInsts;
   return true;
@@ -1073,6 +1122,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
   if (CallInst *CI = dyn_cast<CallInst>(I))
     return OptimizeCallInst(CI);
 
+  if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
+    return DupRetToEnableTailCallOpts(RI);
+
   return false;
 }
 
@@ -1080,21 +1132,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
 // across basic blocks and rewrite them to improve basic-block-at-a-time
 // selection.
 bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
-  bool MadeChange = false;
-
-  // Split all critical edges where the dest block has a PHI.
-  if (CriticalEdgeSplit) {
-    TerminatorInst *BBTI = BB.getTerminator();
-    if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
-      for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
-        BasicBlock *SuccBB = BBTI->getSuccessor(i);
-        if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
-          SplitEdgeNicely(BBTI, i, BackEdges, this);
-      }
-    }
-  }
-
   SunkAddrs.clear();
+  bool MadeChange = false;
 
   CurInstIterator = BB.begin();
   for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; )
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index be12973..e275268 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -13,6 +13,7 @@
 
 #define DEBUG_TYPE "correlated-value-propagation"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index dbb68f3..8dbcc23 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -23,7 +23,6 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/ADT/Statistic.h"
-#include <set>
 using namespace llvm;
 
 STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 867a06a..53e4640 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -340,24 +340,35 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
   // Okay, we have stores to two completely different pointers.  Try to
   // decompose the pointer into a "base + constant_offset" form.  If the base
   // pointers are equal, then we can reason about the two stores.
-  int64_t Off1 = 0, Off2 = 0;
-  const Value *BP1 = GetPointerBaseWithConstantOffset(P1, Off1, TD);
-  const Value *BP2 = GetPointerBaseWithConstantOffset(P2, Off2, TD);
+  int64_t EarlierOff = 0, LaterOff = 0;
+  const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
+  const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
   
   // If the base pointers still differ, we have two completely different stores.
   if (BP1 != BP2)
     return false;
-  
-  // Otherwise, we might have a situation like:
-  //  store i16 -> P + 1 Byte
-  //  store i32 -> P
-  // In this case, we see if the later store completely overlaps all bytes
-  // stored by the previous store.
-  if (Off1 < Off2 ||                       // Earlier starts before Later.
-      Off1+Earlier.Size > Off2+Later.Size) // Earlier goes beyond Later.
-    return false;
-  // Otherwise, we have complete overlap.
-  return true;
+
+  // The later store completely overlaps the earlier store if:
+  // 
+  // 1. Both start at the same offset and the later one's size is greater than
+  //    or equal to the earlier one's, or
+  //
+  //      |--earlier--|
+  //      |--   later   --|
+  //      
+  // 2. The earlier store has an offset greater than the later offset, but which
+  //    still lies completely within the later store.
+  //
+  //        |--earlier--|
+  //    |-----  later  ------|
+  //
+  // We have to be careful here as *Off is signed while *.Size is unsigned.
+  if (EarlierOff >= LaterOff &&
+      uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
+    return true;
+
+  // Otherwise, they don't completely overlap.
+  return false;
 }
 
 /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
@@ -474,7 +485,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
       // away the store and we bail out.  However, if we depend on on something
       // that overwrites the memory location we *can* potentially optimize it.
       //
-      // Find out what memory location the dependant instruction stores.
+      // Find out what memory location the dependent instruction stores.
       Instruction *DepWrite = InstDep.getInst();
       AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
       // If we didn't get a useful location, or if it isn't a size, bail out.
@@ -631,28 +642,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
       if (AA->doesNotAccessMemory(CS))
         continue;
       
-      unsigned NumModRef = 0, NumOther = 0;
-      
       // If the call might load from any of our allocas, then any store above
       // the call is live.
       SmallVector<Value*, 8> LiveAllocas;
       for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
            E = DeadStackObjects.end(); I != E; ++I) {
-        // If we detect that our AA is imprecise, it's not worth it to scan the
-        // rest of the DeadPointers set.  Just assume that the AA will return
-        // ModRef for everything, and go ahead and bail out.
-        if (NumModRef >= 16 && NumOther == 0)
-          return MadeChange;
-
         // See if the call site touches it.
         AliasAnalysis::ModRefResult A = 
           AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
         
-        if (A == AliasAnalysis::ModRef)
-          ++NumModRef;
-        else
-          ++NumOther;
-        
         if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
           LiveAllocas.push_back(*I);
       }
diff --git a/lib/Transforms/Scalar/GEPSplitter.cpp b/lib/Transforms/Scalar/GEPSplitter.cpp
deleted file mode 100644
index 4c3d188..0000000
--- a/lib/Transforms/Scalar/GEPSplitter.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-//===- GEPSplitter.cpp - Split complex GEPs into simple ones --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This function breaks GEPs with more than 2 non-zero operands into smaller
-// GEPs each with no more than 2 non-zero operands. This exposes redundancy
-// between GEPs with common initial operand sequences.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "split-geps"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-using namespace llvm;
-
-namespace {
-  class GEPSplitter : public FunctionPass {
-    virtual bool runOnFunction(Function &F);
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    explicit GEPSplitter() : FunctionPass(ID) {
-      initializeGEPSplitterPass(*PassRegistry::getPassRegistry());
-    }
-  };
-}
-
-char GEPSplitter::ID = 0;
-INITIALIZE_PASS(GEPSplitter, "split-geps",
-                "split complex GEPs into simple GEPs", false, false)
-
-FunctionPass *llvm::createGEPSplitterPass() {
-  return new GEPSplitter();
-}
-
-bool GEPSplitter::runOnFunction(Function &F) {
-  bool Changed = false;
-
-  // Visit each GEP instruction.
-  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
-    for (BasicBlock::iterator II = I->begin(), IE = I->end(); II != IE; )
-      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(II++)) {
-        unsigned NumOps = GEP->getNumOperands();
-        // Ignore GEPs which are already simple.
-        if (NumOps <= 2)
-          continue;
-        bool FirstIndexIsZero = isa<ConstantInt>(GEP->getOperand(1)) &&
-                                cast<ConstantInt>(GEP->getOperand(1))->isZero();
-        if (NumOps == 3 && FirstIndexIsZero)
-          continue;
-        // The first index is special and gets expanded with a 2-operand GEP
-        // (unless it's zero, in which case we can skip this).
-        Value *NewGEP = FirstIndexIsZero ?
-          GEP->getOperand(0) :
-          GetElementPtrInst::Create(GEP->getOperand(0), GEP->getOperand(1),
-                                    "tmp", GEP);
-        // All remaining indices get expanded with a 3-operand GEP with zero
-        // as the second operand.
-        Value *Idxs[2];
-        Idxs[0] = ConstantInt::get(Type::getInt64Ty(F.getContext()), 0);
-        for (unsigned i = 2; i != NumOps; ++i) {
-          Idxs[1] = GEP->getOperand(i);
-          NewGEP = GetElementPtrInst::Create(NewGEP, Idxs, Idxs+2, "tmp", GEP);
-        }
-        GEP->replaceAllUsesWith(NewGEP);
-        GEP->eraseFromParent();
-        Changed = true;
-      }
-
-  return Changed;
-}
-
-void GEPSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesCFG();
-}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index a0123f5..efecb97 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -63,50 +63,48 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
 namespace {
   struct Expression {
     uint32_t opcode;
-    const Type* type;
+    const Type *type;
     SmallVector<uint32_t, 4> varargs;
 
-    Expression() { }
-    Expression(uint32_t o) : opcode(o) { }
+    Expression(uint32_t o = ~2U) : opcode(o) { }
 
     bool operator==(const Expression &other) const {
       if (opcode != other.opcode)
         return false;
-      else if (opcode == ~0U || opcode == ~1U)
+      if (opcode == ~0U || opcode == ~1U)
         return true;
-      else if (type != other.type)
+      if (type != other.type)
         return false;
-      else if (varargs != other.varargs)
+      if (varargs != other.varargs)
         return false;
       return true;
     }
   };
 
   class ValueTable {
-    private:
-      DenseMap<Value*, uint32_t> valueNumbering;
-      DenseMap<Expression, uint32_t> expressionNumbering;
-      AliasAnalysis* AA;
-      MemoryDependenceAnalysis* MD;
-      DominatorTree* DT;
-
-      uint32_t nextValueNumber;
-
-      Expression create_expression(Instruction* I);
-      uint32_t lookup_or_add_call(CallInst* C);
-    public:
-      ValueTable() : nextValueNumber(1) { }
-      uint32_t lookup_or_add(Value *V);
-      uint32_t lookup(Value *V) const;
-      void add(Value *V, uint32_t num);
-      void clear();
-      void erase(Value *v);
-      void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
-      AliasAnalysis *getAliasAnalysis() const { return AA; }
-      void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
-      void setDomTree(DominatorTree* D) { DT = D; }
-      uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
-      void verifyRemoved(const Value *) const;
+    DenseMap<Value*, uint32_t> valueNumbering;
+    DenseMap<Expression, uint32_t> expressionNumbering;
+    AliasAnalysis *AA;
+    MemoryDependenceAnalysis *MD;
+    DominatorTree *DT;
+
+    uint32_t nextValueNumber;
+
+    Expression create_expression(Instruction* I);
+    uint32_t lookup_or_add_call(CallInst* C);
+  public:
+    ValueTable() : nextValueNumber(1) { }
+    uint32_t lookup_or_add(Value *V);
+    uint32_t lookup(Value *V) const;
+    void add(Value *V, uint32_t num);
+    void clear();
+    void erase(Value *v);
+    void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
+    AliasAnalysis *getAliasAnalysis() const { return AA; }
+    void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
+    void setDomTree(DominatorTree* D) { DT = D; }
+    uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
+    void verifyRemoved(const Value *) const;
   };
 }
 
@@ -364,14 +362,14 @@ uint32_t ValueTable::lookup(Value *V) const {
   return VI->second;
 }
 
-/// clear - Remove all entries from the ValueTable
+/// clear - Remove all entries from the ValueTable.
 void ValueTable::clear() {
   valueNumbering.clear();
   expressionNumbering.clear();
   nextValueNumber = 1;
 }
 
-/// erase - Remove a value from the value numbering
+/// erase - Remove a value from the value numbering.
 void ValueTable::erase(Value *V) {
   valueNumbering.erase(V);
 }
@@ -392,20 +390,11 @@ void ValueTable::verifyRemoved(const Value *V) const {
 namespace {
 
   class GVN : public FunctionPass {
-    bool runOnFunction(Function &F);
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    explicit GVN(bool noloads = false)
-        : FunctionPass(ID), NoLoads(noloads), MD(0) {
-      initializeGVNPass(*PassRegistry::getPassRegistry());
-    }
-
-  private:
     bool NoLoads;
     MemoryDependenceAnalysis *MD;
     DominatorTree *DT;
-    const TargetData* TD;
-
+    const TargetData *TD;
+    
     ValueTable VN;
     
     /// LeaderTable - A mapping from value numbers to lists of Value*'s that
@@ -418,17 +407,39 @@ namespace {
     DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
     BumpPtrAllocator TableAllocator;
     
+    SmallVector<Instruction*, 8> InstrsToErase;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit GVN(bool noloads = false)
+        : FunctionPass(ID), NoLoads(noloads), MD(0) {
+      initializeGVNPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+    
+    /// markInstructionForDeletion - This removes the specified instruction from
+    /// our various maps and marks it for deletion.
+    void markInstructionForDeletion(Instruction *I) {
+      VN.erase(I);
+      InstrsToErase.push_back(I);
+    }
+    
+    const TargetData *getTargetData() const { return TD; }
+    DominatorTree &getDominatorTree() const { return *DT; }
+    AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
+    MemoryDependenceAnalysis &getMemDep() const { return *MD; }
+  private:
     /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
     /// its value number.
     void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
-      LeaderTableEntry& Curr = LeaderTable[N];
+      LeaderTableEntry &Curr = LeaderTable[N];
       if (!Curr.Val) {
         Curr.Val = V;
         Curr.BB = BB;
         return;
       }
       
-      LeaderTableEntry* Node = TableAllocator.Allocate<LeaderTableEntry>();
+      LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>();
       Node->Val = V;
       Node->BB = BB;
       Node->Next = Curr.Next;
@@ -474,19 +485,17 @@ namespace {
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<AliasAnalysis>();
     }
+    
 
     // Helper fuctions
     // FIXME: eliminate or document these better
-    bool processLoad(LoadInst* L,
-                     SmallVectorImpl<Instruction*> &toErase);
-    bool processInstruction(Instruction *I,
-                            SmallVectorImpl<Instruction*> &toErase);
-    bool processNonLocalLoad(LoadInst* L,
-                             SmallVectorImpl<Instruction*> &toErase);
+    bool processLoad(LoadInst *L);
+    bool processInstruction(Instruction *I);
+    bool processNonLocalLoad(LoadInst *L);
     bool processBlock(BasicBlock *BB);
-    void dump(DenseMap<uint32_t, Value*>& d);
+    void dump(DenseMap<uint32_t, Value*> &d);
     bool iterateOnFunction(Function &F);
-    bool performPRE(Function& F);
+    bool performPRE(Function &F);
     Value *findLeader(BasicBlock *BB, uint32_t num);
     void cleanupGlobalSets();
     void verifyRemoved(const Instruction *I) const;
@@ -629,17 +638,17 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
   if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
     return 0;
   
+  // If this is already the right type, just return it.
   const Type *StoredValTy = StoredVal->getType();
   
   uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
-  uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
+  uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy);
   
   // If the store and reload are the same size, we can always reuse it.
   if (StoreSize == LoadSize) {
-    if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) {
-      // Pointer to Pointer -> use bitcast.
+    // Pointer to Pointer -> use bitcast.
+    if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy())
       return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
-    }
     
     // Convert source pointers to integers, which can be bitcast.
     if (StoredValTy->isPointerTy()) {
@@ -796,6 +805,36 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
                                         StorePtr, StoreSize, TD);
 }
 
+/// AnalyzeLoadFromClobberingLoad - This function is called when we have a
+/// memdep query of a load that ends up being clobbered by another load.  See if
+/// the other load can feed into the second load.
+static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr,
+                                         LoadInst *DepLI, const TargetData &TD){
+  // Cannot handle reading from store of first-class aggregate yet.
+  if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
+    return -1;
+  
+  Value *DepPtr = DepLI->getPointerOperand();
+  uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
+  int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
+  if (R != -1) return R;
+  
+  // If we have a load/load clobber an DepLI can be widened to cover this load,
+  // then we should widen it!
+  int64_t LoadOffs = 0;
+  const Value *LoadBase =
+    GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD);
+  unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+  
+  unsigned Size = MemoryDependenceAnalysis::
+    getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
+  if (Size == 0) return -1;
+  
+  return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
+}
+
+
+
 static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
                                             MemIntrinsic *MI,
                                             const TargetData &TD) {
@@ -843,9 +882,9 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
 
 /// GetStoreValueForLoad - This function is called when we have a
 /// memdep query of a load that ends up being a clobbering store.  This means
-/// that the store *may* provide bits used by the load but we can't be sure
-/// because the pointers don't mustalias.  Check this case to see if there is
-/// anything more we can do before we give up.
+/// that the store provides bits used by the load but we the pointers don't
+/// mustalias.  Check this case to see if there is anything more we can do
+/// before we give up.
 static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
                                    const Type *LoadTy,
                                    Instruction *InsertPt, const TargetData &TD){
@@ -881,6 +920,69 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
   return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
 }
 
+/// GetStoreValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering load.  This means
+/// that the load *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias.  Check this case to see if there is
+/// anything more we can do before we give up.
+static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
+                                  const Type *LoadTy, Instruction *InsertPt,
+                                  GVN &gvn) {
+  const TargetData &TD = *gvn.getTargetData();
+  // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
+  // widen SrcVal out to a larger load.
+  unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
+  unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+  if (Offset+LoadSize > SrcValSize) {
+    assert(!SrcVal->isVolatile() && "Cannot widen volatile load!");
+    assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load");
+    // If we have a load/load clobber an DepLI can be widened to cover this
+    // load, then we should widen it to the next power of 2 size big enough!
+    unsigned NewLoadSize = Offset+LoadSize;
+    if (!isPowerOf2_32(NewLoadSize))
+      NewLoadSize = NextPowerOf2(NewLoadSize);
+
+    Value *PtrVal = SrcVal->getPointerOperand();
+    
+    // Insert the new load after the old load.  This ensures that subsequent
+    // memdep queries will find the new load.  We can't easily remove the old
+    // load completely because it is already in the value numbering table.
+    IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
+    const Type *DestPTy = 
+      IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
+    DestPTy = PointerType::get(DestPTy, 
+                       cast<PointerType>(PtrVal->getType())->getAddressSpace());
+    
+    PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
+    LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+    NewLoad->takeName(SrcVal);
+    NewLoad->setAlignment(SrcVal->getAlignment());
+    
+    DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
+    DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
+    
+    // Replace uses of the original load with the wider load.  On a big endian
+    // system, we need to shift down to get the relevant bits.
+    Value *RV = NewLoad;
+    if (TD.isBigEndian())
+      RV = Builder.CreateLShr(RV,
+                    NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
+    RV = Builder.CreateTrunc(RV, SrcVal->getType());
+    SrcVal->replaceAllUsesWith(RV);
+    
+    // We would like to use gvn.markInstructionForDeletion here, but we can't
+    // because the load is already memoized into the leader map table that GVN
+    // tracks.  It is potentially possible to remove the load from the table,
+    // but then there all of the operations based on it would need to be
+    // rehashed.  Just leave the dead load around.
+    gvn.getMemDep().removeInstruction(SrcVal);
+    SrcVal = NewLoad;
+  }
+  
+  return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD);
+}
+
+
 /// GetMemInstValueForLoad - This function is called when we have a
 /// memdep query of a load that ends up being a clobbering mem intrinsic.
 static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
@@ -943,11 +1045,12 @@ struct AvailableValueInBlock {
   BasicBlock *BB;
   enum ValType {
     SimpleVal,  // A simple offsetted value that is accessed.
+    LoadVal,    // A value produced by a load.
     MemIntrin   // A memory intrinsic which is loaded from.
   };
   
   /// V - The value that is live out of the block.
-  PointerIntPair<Value *, 1, ValType> Val;
+  PointerIntPair<Value *, 2, ValType> Val;
   
   /// Offset - The byte offset in Val that is interesting for the load query.
   unsigned Offset;
@@ -972,37 +1075,69 @@ struct AvailableValueInBlock {
     return Res;
   }
   
+  static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+                                       unsigned Offset = 0) {
+    AvailableValueInBlock Res;
+    Res.BB = BB;
+    Res.Val.setPointer(LI);
+    Res.Val.setInt(LoadVal);
+    Res.Offset = Offset;
+    return Res;
+  }
+
   bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+  bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+  bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+
   Value *getSimpleValue() const {
     assert(isSimpleValue() && "Wrong accessor");
     return Val.getPointer();
   }
   
+  LoadInst *getCoercedLoadValue() const {
+    assert(isCoercedLoadValue() && "Wrong accessor");
+    return cast<LoadInst>(Val.getPointer());
+  }
+  
   MemIntrinsic *getMemIntrinValue() const {
-    assert(!isSimpleValue() && "Wrong accessor");
+    assert(isMemIntrinValue() && "Wrong accessor");
     return cast<MemIntrinsic>(Val.getPointer());
   }
   
   /// MaterializeAdjustedValue - Emit code into this block to adjust the value
   /// defined here to the specified type.  This handles various coercion cases.
-  Value *MaterializeAdjustedValue(const Type *LoadTy,
-                                  const TargetData *TD) const {
+  Value *MaterializeAdjustedValue(const Type *LoadTy, GVN &gvn) const {
     Value *Res;
     if (isSimpleValue()) {
       Res = getSimpleValue();
       if (Res->getType() != LoadTy) {
+        const TargetData *TD = gvn.getTargetData();
         assert(TD && "Need target data to handle type mismatch case");
         Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
                                    *TD);
         
-        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
+        DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
                      << *getSimpleValue() << '\n'
                      << *Res << '\n' << "\n\n\n");
       }
+    } else if (isCoercedLoadValue()) {
+      LoadInst *Load = getCoercedLoadValue();
+      if (Load->getType() == LoadTy && Offset == 0) {
+        Res = Load;
+      } else {
+        Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+                                  gvn);
+        
+        DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << "  "
+                     << *getCoercedLoadValue() << '\n'
+                     << *Res << '\n' << "\n\n\n");
+      }
     } else {
+      const TargetData *TD = gvn.getTargetData();
+      assert(TD && "Need target data to handle type mismatch case");
       Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
                                    LoadTy, BB->getTerminator(), *TD);
-      DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+      DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
                    << "  " << *getMemIntrinValue() << '\n'
                    << *Res << '\n' << "\n\n\n");
     }
@@ -1010,21 +1145,20 @@ struct AvailableValueInBlock {
   }
 };
 
-}
+} // end anonymous namespace
 
 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
 /// construct SSA form, allowing us to eliminate LI.  This returns the value
 /// that should be used at LI's definition site.
 static Value *ConstructSSAForLoadSet(LoadInst *LI, 
                          SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
-                                     const TargetData *TD,
-                                     const DominatorTree &DT,
-                                     AliasAnalysis *AA) {
+                                     GVN &gvn) {
   // Check for the fully redundant, dominating load case.  In this case, we can
   // just use the dominating value directly.
   if (ValuesPerBlock.size() == 1 && 
-      DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
-    return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
+      gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
+                                               LI->getParent()))
+    return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
 
   // Otherwise, we have to construct SSA form.
   SmallVector<PHINode*, 8> NewPHIs;
@@ -1040,14 +1174,16 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
     if (SSAUpdate.HasValueForBlock(BB))
       continue;
 
-    SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
+    SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn));
   }
   
   // Perform PHI construction.
   Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
   
   // If new PHI nodes were created, notify alias analysis.
-  if (V->getType()->isPointerTy())
+  if (V->getType()->isPointerTy()) {
+    AliasAnalysis *AA = gvn.getAliasAnalysis();
+    
     for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
       AA->copyValue(LI, NewPHIs[i]);
     
@@ -1059,6 +1195,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
       for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii)
         AA->addEscapingUse(P->getOperandUse(2*ii));
     }
+  }
 
   return V;
 }
@@ -1071,8 +1208,7 @@ static bool isLifetimeStart(const Instruction *Inst) {
 
 /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
 /// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *LI,
-                              SmallVectorImpl<Instruction*> &toErase) {
+bool GVN::processNonLocalLoad(LoadInst *LI) {
   // Find the non-local dependencies of the load.
   SmallVector<NonLocalDepResult, 64> Deps;
   AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
@@ -1088,7 +1224,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
 
   // If we had a phi translation failure, we'll have a single entry which is a
   // clobber in the current block.  Reject this early.
-  if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
+  if (Deps.size() == 1 && Deps[0].getResult().isClobber() &&
+      Deps[0].getResult().getInst()->getParent() == LI->getParent()) {
     DEBUG(
       dbgs() << "GVN: non-local load ";
       WriteAsOperand(dbgs(), LI);
@@ -1129,6 +1266,26 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
           }
         }
       }
+      
+      // Check to see if we have something like this:
+      //    load i32* P
+      //    load i8* (P+1)
+      // if we have this, replace the later with an extraction from the former.
+      if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
+        // If this is a clobber and L is the first instruction in its block, then
+        // we have the first instruction in the entry block.
+        if (DepLI != LI && Address && TD) {
+          int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
+                                                     LI->getPointerOperand(),
+                                                     DepLI, *TD);
+          
+          if (Offset != -1) {
+            ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
+                                                                    Offset));
+            continue;
+          }
+        }
+      }
 
       // If the clobbering value is a memset/memcpy/memmove, see if we can
       // forward a value on from it.
@@ -1187,7 +1344,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
           continue;
         }          
       }
-      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
+      ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD));
       continue;
     }
     
@@ -1206,16 +1363,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
     
     // Perform PHI construction.
-    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
-                                      VN.getAliasAnalysis());
+    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
     LI->replaceAllUsesWith(V);
 
     if (isa<PHINode>(V))
       V->takeName(LI);
     if (V->getType()->isPointerTy())
       MD->invalidateCachedPointerInfo(V);
-    VN.erase(LI);
-    toErase.push_back(LI);
+    markInstructionForDeletion(LI);
     ++NumGVNLoad;
     return true;
   }
@@ -1429,22 +1584,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   }
 
   // Perform PHI construction.
-  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
-                                    VN.getAliasAnalysis());
+  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
   LI->replaceAllUsesWith(V);
   if (isa<PHINode>(V))
     V->takeName(LI);
   if (V->getType()->isPointerTy())
     MD->invalidateCachedPointerInfo(V);
-  VN.erase(LI);
-  toErase.push_back(LI);
+  markInstructionForDeletion(LI);
   ++NumPRELoad;
   return true;
 }
 
 /// processLoad - Attempt to eliminate a load, first by eliminating it
 /// locally, and then attempting non-local elimination if that fails.
-bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
+bool GVN::processLoad(LoadInst *L) {
   if (!MD)
     return false;
 
@@ -1454,8 +1607,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
   // ... to a pointer that has been loaded from before...
   MemDepResult Dep = MD->getDependency(L);
 
-  // If the value isn't available, don't do anything!
-  if (Dep.isClobber()) {
+  // If we have a clobber and target data is around, see if this is a clobber
+  // that we can fix up through code synthesis.
+  if (Dep.isClobber() && TD) {
     // Check to see if we have something like this:
     //   store i32 123, i32* %P
     //   %A = bitcast i32* %P to i8*
@@ -1467,26 +1621,40 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     // completely covers this load.  This sort of thing can happen in bitfield
     // access code.
     Value *AvailVal = 0;
-    if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
-      if (TD) {
-        int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
-                                                    L->getPointerOperand(),
-                                                    DepSI, *TD);
-        if (Offset != -1)
-          AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
-                                          L->getType(), L, *TD);
-      }
+    if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
+      int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
+                                                  L->getPointerOperand(),
+                                                  DepSI, *TD);
+      if (Offset != -1)
+        AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
+                                        L->getType(), L, *TD);
+    }
+    
+    // Check to see if we have something like this:
+    //    load i32* P
+    //    load i8* (P+1)
+    // if we have this, replace the later with an extraction from the former.
+    if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) {
+      // If this is a clobber and L is the first instruction in its block, then
+      // we have the first instruction in the entry block.
+      if (DepLI == L)
+        return false;
+      
+      int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
+                                                 L->getPointerOperand(),
+                                                 DepLI, *TD);
+      if (Offset != -1)
+        AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
+    }
     
     // If the clobbering value is a memset/memcpy/memmove, see if we can forward
     // a value on from it.
     if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
-      if (TD) {
-        int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
-                                                      L->getPointerOperand(),
-                                                      DepMI, *TD);
-        if (Offset != -1)
-          AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
-      }
+      int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
+                                                    L->getPointerOperand(),
+                                                    DepMI, *TD);
+      if (Offset != -1)
+        AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD);
     }
         
     if (AvailVal) {
@@ -1497,14 +1665,16 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
       L->replaceAllUsesWith(AvailVal);
       if (AvailVal->getType()->isPointerTy())
         MD->invalidateCachedPointerInfo(AvailVal);
-      VN.erase(L);
-      toErase.push_back(L);
+      markInstructionForDeletion(L);
       ++NumGVNLoad;
       return true;
     }
-        
+  }
+  
+  // If the value isn't available, don't do anything!
+  if (Dep.isClobber()) {
     DEBUG(
-      // fast print dep, using operator<< on instruction would be too slow
+      // fast print dep, using operator<< on instruction is too slow.
       dbgs() << "GVN: load ";
       WriteAsOperand(dbgs(), L);
       Instruction *I = Dep.getInst();
@@ -1515,7 +1685,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
 
   // If it is defined in another block, try harder.
   if (Dep.isNonLocal())
-    return processNonLocalLoad(L, toErase);
+    return processNonLocalLoad(L);
 
   Instruction *DepInst = Dep.getInst();
   if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
@@ -1542,8 +1712,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     L->replaceAllUsesWith(StoredVal);
     if (StoredVal->getType()->isPointerTy())
       MD->invalidateCachedPointerInfo(StoredVal);
-    VN.erase(L);
-    toErase.push_back(L);
+    markInstructionForDeletion(L);
     ++NumGVNLoad;
     return true;
   }
@@ -1556,7 +1725,8 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     // (depending on its type).
     if (DepLI->getType() != L->getType()) {
       if (TD) {
-        AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
+        AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
+                                                      L, *TD);
         if (AvailableVal == 0)
           return false;
       
@@ -1571,8 +1741,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     L->replaceAllUsesWith(AvailableVal);
     if (DepLI->getType()->isPointerTy())
       MD->invalidateCachedPointerInfo(DepLI);
-    VN.erase(L);
-    toErase.push_back(L);
+    markInstructionForDeletion(L);
     ++NumGVNLoad;
     return true;
   }
@@ -1582,19 +1751,17 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
   // intervening stores, for example.
   if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
     L->replaceAllUsesWith(UndefValue::get(L->getType()));
-    VN.erase(L);
-    toErase.push_back(L);
+    markInstructionForDeletion(L);
     ++NumGVNLoad;
     return true;
   }
   
   // If this load occurs either right after a lifetime begin,
   // then the loaded value is undefined.
-  if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) {
     if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
       L->replaceAllUsesWith(UndefValue::get(L->getType()));
-      VN.erase(L);
-      toErase.push_back(L);
+      markInstructionForDeletion(L);
       ++NumGVNLoad;
       return true;
     }
@@ -1634,8 +1801,7 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
 
 /// processInstruction - When calculating availability, handle an instruction
 /// by inserting it into the appropriate sets
-bool GVN::processInstruction(Instruction *I,
-                             SmallVectorImpl<Instruction*> &toErase) {
+bool GVN::processInstruction(Instruction *I) {
   // Ignore dbg info intrinsics.
   if (isa<DbgInfoIntrinsic>(I))
     return false;
@@ -1648,20 +1814,17 @@ bool GVN::processInstruction(Instruction *I,
     I->replaceAllUsesWith(V);
     if (MD && V->getType()->isPointerTy())
       MD->invalidateCachedPointerInfo(V);
-    VN.erase(I);
-    toErase.push_back(I);
+    markInstructionForDeletion(I);
     return true;
   }
 
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    bool Changed = processLoad(LI, toErase);
-
-    if (!Changed) {
-      unsigned Num = VN.lookup_or_add(LI);
-      addToLeaderTable(Num, LI, LI->getParent());
-    }
+    if (processLoad(LI))
+      return true;
 
-    return Changed;
+    unsigned Num = VN.lookup_or_add(LI);
+    addToLeaderTable(Num, LI, LI->getParent());
+    return false;
   }
 
   // For conditions branches, we can perform simple conditional propagation on
@@ -1720,11 +1883,10 @@ bool GVN::processInstruction(Instruction *I,
   }
   
   // Remove it!
-  VN.erase(I);
   I->replaceAllUsesWith(repl);
   if (MD && repl->getType()->isPointerTy())
     MD->invalidateCachedPointerInfo(repl);
-  toErase.push_back(I);
+  markInstructionForDeletion(I);
   return true;
 }
 
@@ -1781,35 +1943,36 @@ bool GVN::runOnFunction(Function& F) {
 
 
 bool GVN::processBlock(BasicBlock *BB) {
-  // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
-  // incrementing BI before processing an instruction).
-  SmallVector<Instruction*, 8> toErase;
+  // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
+  // (and incrementing BI before processing an instruction).
+  assert(InstrsToErase.empty() &&
+         "We expect InstrsToErase to be empty across iterations");
   bool ChangedFunction = false;
 
   for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
        BI != BE;) {
-    ChangedFunction |= processInstruction(BI, toErase);
-    if (toErase.empty()) {
+    ChangedFunction |= processInstruction(BI);
+    if (InstrsToErase.empty()) {
       ++BI;
       continue;
     }
 
     // If we need some instructions deleted, do it now.
-    NumGVNInstr += toErase.size();
+    NumGVNInstr += InstrsToErase.size();
 
     // Avoid iterator invalidation.
     bool AtStart = BI == BB->begin();
     if (!AtStart)
       --BI;
 
-    for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
-         E = toErase.end(); I != E; ++I) {
+    for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(),
+         E = InstrsToErase.end(); I != E; ++I) {
       DEBUG(dbgs() << "GVN removed: " << **I << '\n');
       if (MD) MD->removeInstruction(*I);
       (*I)->eraseFromParent();
       DEBUG(verifyRemoved(*I));
     }
-    toErase.clear();
+    InstrsToErase.clear();
 
     if (AtStart)
       BI = BB->begin();
@@ -1944,11 +2107,11 @@ bool GVN::performPRE(Function &F) {
       addToLeaderTable(ValNo, PREInstr, PREPred);
 
       // Create a PHI to make the value available in this block.
-      PHINode* Phi = PHINode::Create(CurInst->getType(),
+      pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock);
+      PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE),
                                      CurInst->getName() + ".pre-phi",
                                      CurrentBlock->begin());
-      for (pred_iterator PI = pred_begin(CurrentBlock),
-           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+      for (pred_iterator PI = PB; PI != PE; ++PI) {
         BasicBlock *P = *PI;
         Phi->addIncoming(predMap[P], P);
       }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 0fb6798..09d569a 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -73,6 +73,7 @@ namespace {
     LoopInfo        *LI;
     ScalarEvolution *SE;
     DominatorTree   *DT;
+    SmallVector<WeakVH, 16> DeadInsts;
     bool Changed;
   public:
 
@@ -98,6 +99,7 @@ namespace {
     }
 
   private:
+    bool isValidRewrite(Value *FromVal, Value *ToVal);
 
     void EliminateIVComparisons();
     void EliminateIVRemainders();
@@ -134,6 +136,53 @@ Pass *llvm::createIndVarSimplifyPass() {
   return new IndVarSimplify();
 }
 
+/// isValidRewrite - Return true if the SCEV expansion generated by the
+/// rewriter can replace the original value. SCEV guarantees that it
+/// produces the same value, but the way it is produced may be illegal IR.
+/// Ideally, this function will only be called for verification.
+bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
+  // If an SCEV expression subsumed multiple pointers, its expansion could
+  // reassociate the GEP changing the base pointer. This is illegal because the
+  // final address produced by a GEP chain must be inbounds relative to its
+  // underlying object. Otherwise basic alias analysis, among other things,
+  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
+  // producing an expression involving multiple pointers. Until then, we must
+  // bail out here.
+  //
+  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
+  // because it understands lcssa phis while SCEV does not.
+  Value *FromPtr = FromVal;
+  Value *ToPtr = ToVal;
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
+    FromPtr = GEP->getPointerOperand();
+  }
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
+    ToPtr = GEP->getPointerOperand();
+  }
+  if (FromPtr != FromVal || ToPtr != ToVal) {
+    // Quickly check the common case
+    if (FromPtr == ToPtr)
+      return true;
+
+    // SCEV may have rewritten an expression that produces the GEP's pointer
+    // operand. That's ok as long as the pointer operand has the same base
+    // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
+    // base of a recurrence. This handles the case in which SCEV expansion
+    // converts a pointer type recurrence into a nonrecurrent pointer base
+    // indexed by an integer recurrence.
+    const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
+    const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
+    if (FromBase == ToBase)
+      return true;
+
+    DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
+          << *FromBase << " != " << *ToBase << "\n");
+
+    return false;
+  }
+  return true;
+}
+
 /// LinearFunctionTestReplace - This method rewrites the exit condition of the
 /// loop to be a canonical != comparison against the incremented loop induction
 /// variable.  This pass is able to rewrite the exit tests of any loop where the
@@ -226,7 +275,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
   // update the branch to use the new comparison; in the common case this
   // will make old comparison dead.
   BI->setCondition(Cond);
-  RecursivelyDeleteTriviallyDeadInstructions(OrigCond);
+  DeadInsts.push_back(OrigCond);
 
   ++NumLFTR;
   Changed = true;
@@ -304,14 +353,18 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
         if (!SE->isLoopInvariant(ExitValue, L))
           continue;
 
-        Changed = true;
-        ++NumReplaced;
-
         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
 
         DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
                      << "  LoopVal = " << *Inst << "\n");
 
+        if (!isValidRewrite(Inst, ExitVal)) {
+          DeadInsts.push_back(ExitVal);
+          continue;
+        }
+        Changed = true;
+        ++NumReplaced;
+
         PN->setIncomingValue(i, ExitVal);
 
         // If this instruction is dead now, delete it.
@@ -366,8 +419,6 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
 }
 
 void IndVarSimplify::EliminateIVComparisons() {
-  SmallVector<WeakVH, 16> DeadInsts;
-
   // Look for ICmp users.
   for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
     IVStrideUse &UI = *I;
@@ -399,18 +450,9 @@ void IndVarSimplify::EliminateIVComparisons() {
     DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
     DeadInsts.push_back(ICmp);
   }
-
-  // Now that we're done iterating through lists, clean up any instructions
-  // which are now dead.
-  while (!DeadInsts.empty())
-    if (Instruction *Inst =
-        dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
-      RecursivelyDeleteTriviallyDeadInstructions(Inst);
 }
 
 void IndVarSimplify::EliminateIVRemainders() {
-  SmallVector<WeakVH, 16> DeadInsts;
-
   // Look for SRem and URem users.
   for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
     IVStrideUse &UI = *I;
@@ -466,13 +508,6 @@ void IndVarSimplify::EliminateIVRemainders() {
     DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
     DeadInsts.push_back(Rem);
   }
-
-  // Now that we're done iterating through lists, clean up any instructions
-  // which are now dead.
-  while (!DeadInsts.empty())
-    if (Instruction *Inst =
-          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
-      RecursivelyDeleteTriviallyDeadInstructions(Inst);
 }
 
 bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
@@ -491,6 +526,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   LI = &getAnalysis<LoopInfo>();
   SE = &getAnalysis<ScalarEvolution>();
   DT = &getAnalysis<DominatorTree>();
+  DeadInsts.clear();
   Changed = false;
 
   // If there are any floating-point recurrences, attempt to
@@ -589,9 +625,21 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
                                           ExitingBlock, BI, Rewriter);
   }
 
-  // Rewrite IV-derived expressions. Clears the rewriter cache.
+  // Rewrite IV-derived expressions.
   RewriteIVExpressions(L, Rewriter);
 
+  // Clear the rewriter cache, because values that are in the rewriter's cache
+  // can be deleted in the loop below, causing the AssertingVH in the cache to
+  // trigger.
+  Rewriter.clear();
+
+  // Now that we're done iterating through lists, clean up any instructions
+  // which are now dead.
+  while (!DeadInsts.empty())
+    if (Instruction *Inst =
+          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+      RecursivelyDeleteTriviallyDeadInstructions(Inst);
+
   // The Rewriter may not be used from this point on.
 
   // Loop-invariant instructions in the preheader that aren't used in the
@@ -632,7 +680,7 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
       if (!isSafe(*I, L, SE)) return false;
     return true;
   }
-  
+
   // A cast is safe if its operand is.
   if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
     return isSafe(C->getOperand(), L, SE);
@@ -651,8 +699,6 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
 }
 
 void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
-  SmallVector<WeakVH, 16> DeadInsts;
-
   // Rewrite all induction variable expressions in terms of the canonical
   // induction variable.
   //
@@ -705,6 +751,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
     // Now expand it into actual Instructions and patch it into place.
     Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
 
+    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+                 << "   into = " << *NewVal << "\n");
+
+    if (!isValidRewrite(Op, NewVal)) {
+      DeadInsts.push_back(NewVal);
+      continue;
+    }
     // Inform ScalarEvolution that this value is changing. The change doesn't
     // affect its value, but it does potentially affect which use lists the
     // value will be on after the replacement, which affects ScalarEvolution's
@@ -717,25 +770,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
       NewVal->takeName(Op);
     User->replaceUsesOfWith(Op, NewVal);
     UI->setOperandValToReplace(NewVal);
-    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
-                 << "   into = " << *NewVal << "\n");
+
     ++NumRemoved;
     Changed = true;
 
     // The old value may be dead now.
     DeadInsts.push_back(Op);
   }
-
-  // Clear the rewriter cache, because values that are in the rewriter's cache
-  // can be deleted in the loop below, causing the AssertingVH in the cache to
-  // trigger.
-  Rewriter.clear();
-  // Now that we're done iterating through lists, clean up any instructions
-  // which are now dead.
-  while (!DeadInsts.empty())
-    if (Instruction *Inst =
-          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
-      RecursivelyDeleteTriviallyDeadInstructions(Inst);
 }
 
 /// If there's a single exit block, sink any loop-invariant values that
@@ -859,7 +900,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
   BinaryOperator *Incr =
     dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
   if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
-  
+
   // If this is not an add of the PHI with a constantfp, or if the constant fp
   // is not an integer, bail out.
   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
@@ -884,7 +925,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
   if (Compare == 0 || !Compare->hasOneUse() ||
       !isa<BranchInst>(Compare->use_back()))
     return;
-  
+
   BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
 
   // We need to verify that the branch actually controls the iteration count
@@ -896,8 +937,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
       (L->contains(TheBr->getSuccessor(0)) &&
        L->contains(TheBr->getSuccessor(1))))
     return;
-  
-  
+
+
   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
   // transform it.
   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
@@ -905,7 +946,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
   if (ExitValueVal == 0 ||
       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
     return;
-  
+
   // Find new predicate for integer comparison.
   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
   switch (Compare->getPredicate()) {
@@ -923,13 +964,13 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
   case CmpInst::FCMP_OLE:
   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
   }
-  
+
   // We convert the floating point induction variable to a signed i32 value if
   // we can.  This is only safe if the comparison will not overflow in a way
   // that won't be trapped by the integer equivalent operations.  Check for this
   // now.
   // TODO: We could use i64 if it is native and the range requires it.
-  
+
   // The start/stride/exit values must all fit in signed i32.
   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
     return;
@@ -945,59 +986,59 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
     if (InitValue >= ExitValue ||
         NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
       return;
-    
+
     uint32_t Range = uint32_t(ExitValue-InitValue);
     if (NewPred == CmpInst::ICMP_SLE) {
       // Normalize SLE -> SLT, check for infinite loop.
       if (++Range == 0) return;  // Range overflows.
     }
-    
+
     unsigned Leftover = Range % uint32_t(IncValue);
-    
+
     // If this is an equality comparison, we require that the strided value
     // exactly land on the exit value, otherwise the IV condition will wrap
     // around and do things the fp IV wouldn't.
     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
         Leftover != 0)
       return;
-    
+
     // If the stride would wrap around the i32 before exiting, we can't
     // transform the IV.
     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
       return;
-    
+
   } else {
     // If we have a negative stride, we require the init to be greater than the
     // exit value and an equality or greater than comparison.
     if (InitValue >= ExitValue ||
         NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
       return;
-    
+
     uint32_t Range = uint32_t(InitValue-ExitValue);
     if (NewPred == CmpInst::ICMP_SGE) {
       // Normalize SGE -> SGT, check for infinite loop.
       if (++Range == 0) return;  // Range overflows.
     }
-    
+
     unsigned Leftover = Range % uint32_t(-IncValue);
-    
+
     // If this is an equality comparison, we require that the strided value
     // exactly land on the exit value, otherwise the IV condition will wrap
     // around and do things the fp IV wouldn't.
     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
         Leftover != 0)
       return;
-    
+
     // If the stride would wrap around the i32 before exiting, we can't
     // transform the IV.
     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
       return;
   }
-  
+
   const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
 
   // Insert new integer induction variable.
-  PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN);
+  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
                       PN->getIncomingBlock(IncomingEdge));
 
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 90094a8..7168177 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -16,6 +16,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/Loads.h"
@@ -170,9 +171,9 @@ bool JumpThreading::runOnFunction(Function &F) {
         Changed = true;
         continue;
       }
-      
+
       BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
-      
+
       // Can't thread an unconditional jump, but if the block is "almost
       // empty", we can replace uses of it with uses of the successor and make
       // this dead.
@@ -608,7 +609,7 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
 
 static bool hasAddressTakenAndUsed(BasicBlock *BB) {
   if (!BB->hasAddressTaken()) return false;
-  
+
   // If the block has its address taken, it may be a tree of dead constants
   // hanging off of it.  These shouldn't keep the block alive.
   BlockAddress *BA = BlockAddress::get(BB);
@@ -668,6 +669,17 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
     return false; // Must be an invoke.
   }
 
+  // Run constant folding to see if we can reduce the condition to a simple
+  // constant.
+  if (Instruction *I = dyn_cast<Instruction>(Condition)) {
+    Value *SimpleVal = ConstantFoldInstruction(I, TD);
+    if (SimpleVal) {
+      I->replaceAllUsesWith(SimpleVal);
+      I->eraseFromParent();
+      Condition = SimpleVal;
+    }
+  }
+
   // If the terminator is branching on an undef, we can pick any of the
   // successors to branch to.  Let GetBestDestForJumpOnUndef decide.
   if (isa<UndefValue>(Condition)) {
@@ -928,13 +940,14 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
 
   // Create a PHI node at the start of the block for the PRE'd load value.
-  PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin());
+  pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
+  PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
+                                LoadBB->begin());
   PN->takeName(LI);
 
   // Insert new entries into the PHI for each predecessor.  A single block may
   // have multiple entries here.
-  for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
-       ++PI) {
+  for (pred_iterator PI = PB; PI != PE; ++PI) {
     BasicBlock *P = *PI;
     AvailablePredsTy::iterator I =
       std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 0786793..93de9cf 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -445,7 +445,8 @@ void LICM::sink(Instruction &I) {
   // enough that we handle it as a special (more efficient) case.  It is more
   // efficient to handle because there are no PHI nodes that need to be placed.
   if (ExitBlocks.size() == 1) {
-    if (!DT->dominates(I.getParent(), ExitBlocks[0])) {
+    if (!isa<DbgInfoIntrinsic>(I) && 
+        !DT->dominates(I.getParent(), ExitBlocks[0])) {
       // Instruction is not used, just delete it.
       CurAST->deleteValue(&I);
       // If I has users in unreachable blocks, eliminate.
@@ -742,30 +743,13 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
                  Preheader->getTerminator());
   SSA.AddAvailableValue(Preheader, PreheaderLoad);
 
-  // Copy any value stored to or loaded from a must-alias of the pointer.
-  if (PreheaderLoad->getType()->isPointerTy()) {
-    Value *SomeValue;
-    if (LoadInst *LI = dyn_cast<LoadInst>(LoopUses[0]))
-      SomeValue = LI;
-    else
-      SomeValue = cast<StoreInst>(LoopUses[0])->getValueOperand();
-    
-    CurAST->copyValue(SomeValue, PreheaderLoad);
-  }
-
   // Rewrite all the loads in the loop and remember all the definitions from
   // stores in the loop.
   Promoter.run(LoopUses);
-  
-  // If the preheader load is itself a pointer, we need to tell alias analysis
-  // about the new pointer we created in the preheader block and about any PHI
-  // nodes that just got inserted.
-  if (PreheaderLoad->getType()->isPointerTy()) {
-    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
-      CurAST->copyValue(PreheaderLoad, NewPHIs[i]);
-  }
-  
-  // fwew, we're done!
+
+  // If the SSAUpdater didn't use the load in the preheader, just zap it now.
+  if (PreheaderLoad->use_empty())
+    PreheaderLoad->eraseFromParent();
 }
 
 
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index f8ce214..1366231 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -81,7 +81,7 @@ namespace {
 
     bool processLoopStore(StoreInst *SI, const SCEV *BECount);
     bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
-    
+
     bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
                                  unsigned StoreAlignment,
                                  Value *SplatValue, Instruction *TheStore,
@@ -91,7 +91,7 @@ namespace {
                                     const SCEVAddRecExpr *StoreEv,
                                     const SCEVAddRecExpr *LoadEv,
                                     const SCEV *BECount);
-      
+
     /// This transformation requires natural loop information & requires that
     /// loop preheaders be inserted into the CFG.
     ///
@@ -134,50 +134,50 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
 ///
 static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
   SmallVector<Instruction*, 32> NowDeadInsts;
-  
+
   NowDeadInsts.push_back(I);
-  
+
   // Before we touch this instruction, remove it from SE!
   do {
     Instruction *DeadInst = NowDeadInsts.pop_back_val();
-    
+
     // This instruction is dead, zap it, in stages.  Start by removing it from
     // SCEV.
     SE.forgetValue(DeadInst);
-    
+
     for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
       Value *Op = DeadInst->getOperand(op);
       DeadInst->setOperand(op, 0);
-      
+
       // If this operand just became dead, add it to the NowDeadInsts list.
       if (!Op->use_empty()) continue;
-      
+
       if (Instruction *OpI = dyn_cast<Instruction>(Op))
         if (isInstructionTriviallyDead(OpI))
           NowDeadInsts.push_back(OpI);
     }
-    
+
     DeadInst->eraseFromParent();
-    
+
   } while (!NowDeadInsts.empty());
 }
 
 bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
   CurLoop = L;
-  
+
   // The trip count of the loop must be analyzable.
   SE = &getAnalysis<ScalarEvolution>();
   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
     return false;
   const SCEV *BECount = SE->getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BECount)) return false;
-  
+
   // If this loop executes exactly one time, then it should be peeled, not
   // optimized by this pass.
   if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
     if (BECst->getValue()->getValue() == 0)
       return false;
-  
+
   // We require target data for now.
   TD = getAnalysisIfAvailable<TargetData>();
   if (TD == 0) return false;
@@ -185,14 +185,14 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
   DT = &getAnalysis<DominatorTree>();
   LoopInfo &LI = getAnalysis<LoopInfo>();
   TLI = &getAnalysis<TargetLibraryInfo>();
-  
+
   SmallVector<BasicBlock*, 8> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
 
   DEBUG(dbgs() << "loop-idiom Scanning: F["
                << L->getHeader()->getParent()->getName()
                << "] Loop %" << L->getHeader()->getName() << "\n");
-  
+
   bool MadeChange = false;
   // Scan all the blocks in the loop that are not in subloops.
   for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
@@ -200,7 +200,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
     // Ignore blocks in subloops.
     if (LI.getLoopFor(*BI) != CurLoop)
       continue;
-    
+
     MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
   }
   return MadeChange;
@@ -217,7 +217,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
     if (!DT->dominates(BB, ExitBlocks[i]))
       return false;
-  
+
   bool MadeChange = false;
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
     Instruction *Inst = I++;
@@ -226,20 +226,20 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
       WeakVH InstPtr(I);
       if (!processLoopStore(SI, BECount)) continue;
       MadeChange = true;
-      
+
       // If processing the store invalidated our iterator, start over from the
       // top of the block.
       if (InstPtr == 0)
         I = BB->begin();
       continue;
     }
-    
+
     // Look for memset instructions, which may be optimized to a larger memset.
     if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst))  {
       WeakVH InstPtr(I);
       if (!processLoopMemSet(MSI, BECount)) continue;
       MadeChange = true;
-      
+
       // If processing the memset invalidated our iterator, start over from the
       // top of the block.
       if (InstPtr == 0)
@@ -247,7 +247,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
       continue;
     }
   }
-  
+
   return MadeChange;
 }
 
@@ -258,12 +258,12 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
 
   Value *StoredVal = SI->getValueOperand();
   Value *StorePtr = SI->getPointerOperand();
-  
+
   // Reject stores that are so large that they overflow an unsigned.
   uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType());
   if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
     return false;
-  
+
   // See if the pointer expression is an AddRec like {base,+,1} on the current
   // loop, which indicates a strided store.  If we have something else, it's a
   // random store we can't handle.
@@ -274,9 +274,9 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
 
   // Check to see if the stride matches the size of the store.  If so, then we
   // know that every byte is touched in the loop.
-  unsigned StoreSize = (unsigned)SizeInBits >> 3; 
+  unsigned StoreSize = (unsigned)SizeInBits >> 3;
   const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
-  
+
   if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) {
     // TODO: Could also handle negative stride here someday, that will require
     // the validity check in mayLoopAccessLocation to be updated though.
@@ -285,7 +285,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
       dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
       dbgs() << "BB: " << *SI->getParent();
     }
-    
+
     return false;
   }
 
@@ -319,9 +319,9 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
   // If we're not allowed to hack on memset, we fail.
   if (!TLI->has(LibFunc::memset))
     return false;
-  
+
   Value *Pointer = MSI->getDest();
-  
+
   // See if the pointer expression is an AddRec like {base,+,1} on the current
   // loop, which indicates a strided store.  If we have something else, it's a
   // random store we can't handle.
@@ -333,16 +333,16 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
   uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
   if ((SizeInBytes >> 32) != 0)
     return false;
-  
+
   // Check to see if the stride matches the size of the memset.  If so, then we
   // know that every byte is touched in the loop.
   const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
-  
+
   // TODO: Could also handle negative stride here someday, that will require the
   // validity check in mayLoopAccessLocation to be updated though.
   if (Stride == 0 || MSI->getLength() != Stride->getValue())
     return false;
-  
+
   return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
                                  MSI->getAlignment(), MSI->getValue(),
                                  MSI, Ev, BECount);
@@ -365,7 +365,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
   // to be exactly the size of the memset, which is (BECount+1)*StoreSize
   if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
     AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
-  
+
   // TODO: For this to be really effective, we have to dive into the pointer
   // operand in the store.  Store to &A[i] of 100 will always return may alias
   // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
@@ -394,12 +394,12 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
   // that doesn't seem worthwhile.
   Constant *C = dyn_cast<Constant>(V);
   if (C == 0) return 0;
-  
+
   // Only handle simple values that are a power of two bytes in size.
   uint64_t Size = TD.getTypeSizeInBits(V->getType());
   if (Size == 0 || (Size & 7) || (Size & (Size-1)))
     return 0;
-  
+
   // Don't care enough about darwin/ppc to implement this.
   if (TD.isBigEndian())
     return 0;
@@ -410,7 +410,7 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
   // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
   // if the top and bottom are the same (e.g. for vectors and large integers).
   if (Size > 16) return 0;
-  
+
   // If the constant is exactly 16 bytes, just use it.
   if (Size == 16) return C;
 
@@ -428,14 +428,14 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
                         unsigned StoreAlignment, Value *StoredVal,
                         Instruction *TheStore, const SCEVAddRecExpr *Ev,
                         const SCEV *BECount) {
-  
+
   // If the stored value is a byte-wise value (like i32 -1), then it may be
   // turned into a memset of i8 -1, assuming that all the consecutive bytes
   // are stored.  A store of i32 0x01020304 can never be turned into a memset,
   // but it can be turned into memset_pattern if the target supports it.
   Value *SplatValue = isBytewiseValue(StoredVal);
   Constant *PatternValue = 0;
-  
+
   // If we're allowed to form a memset, and the stored value would be acceptable
   // for memset, use it.
   if (SplatValue && TLI->has(LibFunc::memset) &&
@@ -453,8 +453,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
     // do anything with a 3-byte store, for example.
     return false;
   }
-  
-  
+
+
   // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
   // this into a memset in the loop preheader now if we want.  However, this
   // would be unsafe to do if there is anything else in the loop that may read
@@ -463,47 +463,47 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
                             CurLoop, BECount,
                             StoreSize, getAnalysis<AliasAnalysis>(), TheStore))
     return false;
-  
+
   // Okay, everything looks good, insert the memset.
   BasicBlock *Preheader = CurLoop->getLoopPreheader();
-  
+
   IRBuilder<> Builder(Preheader->getTerminator());
-  
+
   // The trip count of the loop and the base pointer of the addrec SCEV is
   // guaranteed to be loop invariant, which means that it should dominate the
   // header.  Just insert code for it in the preheader.
   SCEVExpander Expander(*SE);
-  
+
   unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
-  Value *BasePtr = 
+  Value *BasePtr =
     Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
                            Preheader->getTerminator());
-  
+
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
   // pointer size if it isn't already.
   const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
   BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
-  
+
   const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
-                                         true /*no unsigned overflow*/);
+                                         SCEV::FlagNUW);
   if (StoreSize != 1)
     NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
-                               true /*no unsigned overflow*/);
-  
-  Value *NumBytes = 
+                               SCEV::FlagNUW);
+
+  Value *NumBytes =
     Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
-  
-  Value *NewCall;
+
+  CallInst *NewCall;
   if (SplatValue)
     NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
   else {
     Module *M = TheStore->getParent()->getParent()->getParent();
     Value *MSP = M->getOrInsertFunction("memset_pattern16",
                                         Builder.getVoidTy(),
-                                        Builder.getInt8PtrTy(), 
+                                        Builder.getInt8PtrTy(),
                                         Builder.getInt8PtrTy(), IntPtr,
                                         (void*)0);
-    
+
     // Otherwise we should form a memset_pattern16.  PatternValue is known to be
     // an constant array of 16-bytes.  Plop the value into a mergable global.
     GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
@@ -514,11 +514,11 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
     Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
     NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
   }
-  
+
   DEBUG(dbgs() << "  Formed memset: " << *NewCall << "\n"
                << "    from store to: " << *Ev << " at: " << *TheStore << "\n");
-  (void)NewCall;
-  
+  NewCall->setDebugLoc(TheStore->getDebugLoc());
+
   // Okay, the memset has been formed.  Zap the original store and anything that
   // feeds into it.
   DeleteDeadInstruction(TheStore, *SE);
@@ -536,9 +536,9 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
   // If we're not allowed to form memcpy, we fail.
   if (!TLI->has(LibFunc::memcpy))
     return false;
-  
+
   LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
-  
+
   // Okay, we have a strided store "p[i]" of a loaded value.  We can turn
   // this into a memcpy in the loop preheader now if we want.  However, this
   // would be unsafe to do if there is anything else in the loop that may read
@@ -555,49 +555,49 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
                             CurLoop, BECount, StoreSize,
                             getAnalysis<AliasAnalysis>(), SI))
     return false;
-  
+
   // Okay, everything looks good, insert the memcpy.
   BasicBlock *Preheader = CurLoop->getLoopPreheader();
-  
+
   IRBuilder<> Builder(Preheader->getTerminator());
-  
+
   // The trip count of the loop and the base pointer of the addrec SCEV is
   // guaranteed to be loop invariant, which means that it should dominate the
   // header.  Just insert code for it in the preheader.
   SCEVExpander Expander(*SE);
 
-  Value *LoadBasePtr = 
+  Value *LoadBasePtr =
     Expander.expandCodeFor(LoadEv->getStart(),
                            Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
                            Preheader->getTerminator());
-  Value *StoreBasePtr = 
+  Value *StoreBasePtr =
     Expander.expandCodeFor(StoreEv->getStart(),
                            Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
                            Preheader->getTerminator());
-  
+
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
   // pointer size if it isn't already.
   const Type *IntPtr = TD->getIntPtrType(SI->getContext());
   BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
-  
+
   const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
-                                         true /*no unsigned overflow*/);
+                                         SCEV::FlagNUW);
   if (StoreSize != 1)
     NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
-                               true /*no unsigned overflow*/);
-  
+                               SCEV::FlagNUW);
+
   Value *NumBytes =
     Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
-  
+
   Value *NewCall =
     Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
                          std::min(SI->getAlignment(), LI->getAlignment()));
-  
+
   DEBUG(dbgs() << "  Formed memcpy: " << *NewCall << "\n"
                << "    from load ptr=" << *LoadEv << " at: " << *LI << "\n"
                << "    from store ptr=" << *StoreEv << " at: " << *SI << "\n");
   (void)NewCall;
-  
+
   // Okay, the memset has been formed.  Zap the original store and anything that
   // feeds into it.
   DeleteDeadInstruction(SI, *SE);
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 95e1578..47dced3 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -184,7 +184,11 @@ bool LoopRotate::rotateLoop(Loop *L) {
   // Now, this loop is suitable for rotation.
   BasicBlock *OrigPreheader = L->getLoopPreheader();
   BasicBlock *OrigLatch = L->getLoopLatch();
-  assert(OrigPreheader && OrigLatch && "Loop not in canonical form?");
+  
+  // If the loop could not be converted to canonical form, it must have an
+  // indirectbr in it, just give up.
+  if (OrigPreheader == 0 || OrigLatch == 0)
+    return false;
 
   // Anything ScalarEvolution may know about this loop or the PHI nodes
   // in its header will soon be invalidated.
@@ -322,7 +326,8 @@ bool LoopRotate::rotateLoop(Loop *L) {
     // We can fold the conditional branch in the preheader, this makes things
     // simpler. The first step is to remove the extra edge to the Exit block.
     Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
-    BranchInst::Create(NewHeader, PHBI);
+    BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
+    NewBI->setDebugLoc(PHBI->getDebugLoc());
     PHBI->eraseFromParent();
     
     // With our CFG finalized, update DomTree if it is available.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index ac4aea2..5abc790 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -253,7 +253,8 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
       DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
       DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
                                       AR->getStepRecurrence(SE),
-                                      AR->getLoop()),
+                                      // FIXME: AR->getNoWrapFlags()
+                                      AR->getLoop(), SCEV::FlagAnyWrap),
                      L, Good, Bad, SE);
       return;
     }
@@ -455,7 +456,10 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
       const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
                                        IgnoreSignificantBits);
       if (!Start) return 0;
-      return SE.getAddRecExpr(Start, Step, AR->getLoop());
+      // FlagNW is independent of the start value, step direction, and is
+      // preserved with smaller magnitude steps.
+      // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+      return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
     }
     return 0;
   }
@@ -520,7 +524,9 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
     SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
     int64_t Result = ExtractImmediate(NewOps.front(), SE);
     if (Result != 0)
-      S = SE.getAddRecExpr(NewOps, AR->getLoop());
+      S = SE.getAddRecExpr(NewOps, AR->getLoop(),
+                           // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+                           SCEV::FlagAnyWrap);
     return Result;
   }
   return 0;
@@ -545,7 +551,9 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
     SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
     GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
     if (Result)
-      S = SE.getAddRecExpr(NewOps, AR->getLoop());
+      S = SE.getAddRecExpr(NewOps, AR->getLoop(),
+                           // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+                           SCEV::FlagAnyWrap);
     return Result;
   }
   return 0;
@@ -564,9 +572,6 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
     switch (II->getIntrinsicID()) {
       default: break;
       case Intrinsic::prefetch:
-      case Intrinsic::x86_sse2_loadu_dq:
-      case Intrinsic::x86_sse2_loadu_pd:
-      case Intrinsic::x86_sse_loadu_ps:
       case Intrinsic::x86_sse_storeu_ps:
       case Intrinsic::x86_sse2_storeu_pd:
       case Intrinsic::x86_sse2_storeu_dq:
@@ -781,7 +786,7 @@ void Cost::RateFormula(const Formula &F,
   }
 }
 
-/// Loose - Set this cost to a loosing value.
+/// Loose - Set this cost to a losing value.
 void Cost::Loose() {
   NumRegs = ~0u;
   AddRecCost = ~0u;
@@ -1483,7 +1488,7 @@ void LSRInstance::OptimizeShadowIV() {
     if (!C->getValue().isStrictlyPositive()) continue;
 
     /* Add new PHINode. */
-    PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
+    PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
 
     /* create new increment. '++d' in above example. */
     Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
@@ -1819,7 +1824,7 @@ LSRInstance::OptimizeLoopTermCond() {
   }
 }
 
-/// reconcileNewOffset - Determine if the given use can accomodate a fixup
+/// reconcileNewOffset - Determine if the given use can accommodate a fixup
 /// at the given offset and other details. If so, update the use and
 /// return true.
 bool
@@ -2236,7 +2241,9 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
     if (!AR->getStart()->isZero()) {
       CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
                                        AR->getStepRecurrence(SE),
-                                       AR->getLoop()),
+                                       AR->getLoop(),
+                                       //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+                                       SCEV::FlagAnyWrap),
                       C, Ops, L, SE);
       CollectSubexprs(AR->getStart(), C, Ops, L, SE);
       return;
@@ -3047,7 +3054,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
   }
 }
 
-/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call 
+/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
 /// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
 /// we've done more filtering, as it may be able to find more formulae to
 /// eliminate.
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 80b263a..fef6bc3 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -43,7 +43,13 @@ namespace {
   class LoopUnroll : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
-    LoopUnroll() : LoopPass(ID) {
+    LoopUnroll(int T = -1, int C = -1,  int P = -1) : LoopPass(ID) {
+      CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
+      CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
+      CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
+
+      UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
+     
       initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
     }
 
@@ -56,7 +62,10 @@ namespace {
     // explicit -unroll-threshold).
     static const unsigned OptSizeUnrollThreshold = 50;
     
+    unsigned CurrentCount;
     unsigned CurrentThreshold;
+    bool     CurrentAllowPartial;
+    bool     UserThreshold;        // CurrentThreshold is user-specified.
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -87,7 +96,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
 
-Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
+  return new LoopUnroll(Threshold, Count, AllowPartial);
+}
 
 /// ApproximateLoopSize - Approximate the size of the loop.
 static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
@@ -119,14 +130,14 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   // from UnrollThreshold, it is overridden to a smaller value if the current
   // function is marked as optimize-for-size, and the unroll threshold was
   // not user specified.
-  CurrentThreshold = UnrollThreshold;
-  if (Header->getParent()->hasFnAttr(Attribute::OptimizeForSize) &&
-      UnrollThreshold.getNumOccurrences() == 0)
-    CurrentThreshold = OptSizeUnrollThreshold;
+  unsigned Threshold = CurrentThreshold;
+  if (!UserThreshold && 
+      Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
+    Threshold = OptSizeUnrollThreshold;
 
   // Find trip count
   unsigned TripCount = L->getSmallConstantTripCount();
-  unsigned Count = UnrollCount;
+  unsigned Count = CurrentCount;
 
   // Automatically select an unroll count.
   if (Count == 0) {
@@ -140,7 +151,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 
   // Enforce the threshold.
-  if (CurrentThreshold != NoThreshold) {
+  if (Threshold != NoThreshold) {
     unsigned NumInlineCandidates;
     unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates);
     DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
@@ -149,16 +160,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
       return false;
     }
     uint64_t Size = (uint64_t)LoopSize*Count;
-    if (TripCount != 1 && Size > CurrentThreshold) {
+    if (TripCount != 1 && Size > Threshold) {
       DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
-            << " because size: " << Size << ">" << CurrentThreshold << "\n");
-      if (!UnrollAllowPartial) {
+            << " because size: " << Size << ">" << Threshold << "\n");
+      if (!CurrentAllowPartial) {
         DEBUG(dbgs() << "  will not try to unroll partially because "
               << "-unroll-allow-partial not given\n");
         return false;
       }
       // Reduce unroll count to be modulo of TripCount for partial unrolling
-      Count = CurrentThreshold / LoopSize;
+      Count = Threshold / LoopSize;
       while (Count != 0 && TripCount%Count != 0) {
         Count--;
       }
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index bde0e53..a3035cb 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include <list>
 using namespace llvm;
 
@@ -299,12 +300,15 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
 namespace {
   class MemCpyOpt : public FunctionPass {
     MemoryDependenceAnalysis *MD;
+    TargetLibraryInfo *TLI;
     const TargetData *TD;
   public:
     static char ID; // Pass identification, replacement for typeid
     MemCpyOpt() : FunctionPass(ID) {
       initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
       MD = 0;
+      TLI = 0;
+      TD = 0;
     }
 
     bool runOnFunction(Function &F);
@@ -316,6 +320,7 @@ namespace {
       AU.addRequired<DominatorTree>();
       AU.addRequired<MemoryDependenceAnalysis>();
       AU.addRequired<AliasAnalysis>();
+      AU.addRequired<TargetLibraryInfo>();
       AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<MemoryDependenceAnalysis>();
     }
@@ -346,6 +351,7 @@ INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
                     false, false)
@@ -688,7 +694,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
   if (M->getSource() == MDep->getSource())
     return false;
   
-  // Second, the length of the memcpy's must be the same, or the preceeding one
+  // Second, the length of the memcpy's must be the same, or the preceding one
   // must be larger than the following one.
   ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
   ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
@@ -804,6 +810,9 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
 bool MemCpyOpt::processMemMove(MemMoveInst *M) {
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
 
+  if (!TLI->has(LibFunc::memmove))
+    return false;
+  
   // See if the pointers alias.
   if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
     return false;
@@ -935,6 +944,14 @@ bool MemCpyOpt::runOnFunction(Function &F) {
   bool MadeChange = false;
   MD = &getAnalysis<MemoryDependenceAnalysis>();
   TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
+  
+  // If we don't have at least memset and memcpy, there is little point of doing
+  // anything here.  These are required by a freestanding implementation, so if
+  // even they are disabled, there is no point in trying hard.
+  if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
+    return false;
+  
   while (1) {
     if (!iterateOnFunction(F))
       break;
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index e093b52..c1dfe15 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -22,6 +22,7 @@
 
 #define DEBUG_TYPE "reassociate"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -74,6 +75,8 @@ namespace {
   class Reassociate : public FunctionPass {
     DenseMap<BasicBlock*, unsigned> RankMap;
     DenseMap<AssertingVH<>, unsigned> ValueRankMap;
+    SmallVector<WeakVH, 8> RedoInsts;
+    SmallVector<WeakVH, 8> DeadInsts;
     bool MadeChange;
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -98,7 +101,7 @@ namespace {
     void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
     void LinearizeExpr(BinaryOperator *I);
     Value *RemoveFactorFromExpression(Value *V, Value *Factor);
-    void ReassociateBB(BasicBlock *BB);
+    void ReassociateInst(BasicBlock::iterator &BBI);
     
     void RemoveDeadBinaryOp(Value *V);
   };
@@ -113,13 +116,13 @@ FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
 
 void Reassociate::RemoveDeadBinaryOp(Value *V) {
   Instruction *Op = dyn_cast<Instruction>(V);
-  if (!Op || !isa<BinaryOperator>(Op) || !Op->use_empty())
+  if (!Op || !isa<BinaryOperator>(Op))
     return;
   
   Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1);
   
   ValueRankMap.erase(Op);
-  Op->eraseFromParent();
+  DeadInsts.push_back(Op);
   RemoveDeadBinaryOp(LHS);
   RemoveDeadBinaryOp(RHS);
 }
@@ -214,6 +217,7 @@ static Instruction *LowerNegateToMultiply(Instruction *Neg,
   ValueRankMap.erase(Neg);
   Res->takeName(Neg);
   Neg->replaceAllUsesWith(Res);
+  Res->setDebugLoc(Neg->getDebugLoc());
   Neg->eraseFromParent();
   return Res;
 }
@@ -503,6 +507,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
   // Everyone now refers to the add instruction.
   ValueRankMap.erase(Sub);
   Sub->replaceAllUsesWith(New);
+  New->setDebugLoc(Sub->getDebugLoc());
   Sub->eraseFromParent();
 
   DEBUG(dbgs() << "Negated: " << *New << '\n');
@@ -528,6 +533,7 @@ static Instruction *ConvertShiftToMul(Instruction *Shl,
     ValueRankMap.erase(Shl);
     Mul->takeName(Shl);
     Shl->replaceAllUsesWith(Mul);
+    Mul->setDebugLoc(Shl->getDebugLoc());
     Shl->eraseFromParent();
     return Mul;
   }
@@ -603,7 +609,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
   // remaining operand.
   if (Factors.size() == 1) {
     ValueRankMap.erase(BO);
-    BO->eraseFromParent();
+    DeadInsts.push_back(BO);
     V = Factors[0].Op;
   } else {
     RewriteExprTree(BO, Factors);
@@ -732,7 +738,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
       // Now that we have inserted a multiply, optimize it. This allows us to
       // handle cases that require multiple factoring steps, such as this:
       // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
-      Mul = ReassociateExpression(cast<BinaryOperator>(Mul));
+      RedoInsts.push_back(Mul);
       
       // If every add operand was a duplicate, return the multiply.
       if (Ops.empty())
@@ -960,71 +966,69 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
 }
 
 
-/// ReassociateBB - Inspect all of the instructions in this basic block,
-/// reassociating them as we go.
-void Reassociate::ReassociateBB(BasicBlock *BB) {
-  for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) {
-    Instruction *BI = BBI++;
-    if (BI->getOpcode() == Instruction::Shl &&
-        isa<ConstantInt>(BI->getOperand(1)))
-      if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
-        MadeChange = true;
-        BI = NI;
-      }
+/// ReassociateInst - Inspect and reassociate the instruction at the
+/// given position, post-incrementing the position.
+void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) {
+  Instruction *BI = BBI++;
+  if (BI->getOpcode() == Instruction::Shl &&
+      isa<ConstantInt>(BI->getOperand(1)))
+    if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
+      MadeChange = true;
+      BI = NI;
+    }
 
-    // Reject cases where it is pointless to do this.
-    if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || 
-        BI->getType()->isVectorTy())
-      continue;  // Floating point ops are not associative.
-
-    // Do not reassociate boolean (i1) expressions.  We want to preserve the
-    // original order of evaluation for short-circuited comparisons that
-    // SimplifyCFG has folded to AND/OR expressions.  If the expression
-    // is not further optimized, it is likely to be transformed back to a
-    // short-circuited form for code gen, and the source order may have been
-    // optimized for the most likely conditions.
-    if (BI->getType()->isIntegerTy(1))
-      continue;
+  // Reject cases where it is pointless to do this.
+  if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || 
+      BI->getType()->isVectorTy())
+    return;  // Floating point ops are not associative.
+
+  // Do not reassociate boolean (i1) expressions.  We want to preserve the
+  // original order of evaluation for short-circuited comparisons that
+  // SimplifyCFG has folded to AND/OR expressions.  If the expression
+  // is not further optimized, it is likely to be transformed back to a
+  // short-circuited form for code gen, and the source order may have been
+  // optimized for the most likely conditions.
+  if (BI->getType()->isIntegerTy(1))
+    return;
 
-    // If this is a subtract instruction which is not already in negate form,
-    // see if we can convert it to X+-Y.
-    if (BI->getOpcode() == Instruction::Sub) {
-      if (ShouldBreakUpSubtract(BI)) {
-        BI = BreakUpSubtract(BI, ValueRankMap);
-        // Reset the BBI iterator in case BreakUpSubtract changed the
-        // instruction it points to.
-        BBI = BI;
-        ++BBI;
+  // If this is a subtract instruction which is not already in negate form,
+  // see if we can convert it to X+-Y.
+  if (BI->getOpcode() == Instruction::Sub) {
+    if (ShouldBreakUpSubtract(BI)) {
+      BI = BreakUpSubtract(BI, ValueRankMap);
+      // Reset the BBI iterator in case BreakUpSubtract changed the
+      // instruction it points to.
+      BBI = BI;
+      ++BBI;
+      MadeChange = true;
+    } else if (BinaryOperator::isNeg(BI)) {
+      // Otherwise, this is a negation.  See if the operand is a multiply tree
+      // and if this is not an inner node of a multiply tree.
+      if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
+          (!BI->hasOneUse() ||
+           !isReassociableOp(BI->use_back(), Instruction::Mul))) {
+        BI = LowerNegateToMultiply(BI, ValueRankMap);
         MadeChange = true;
-      } else if (BinaryOperator::isNeg(BI)) {
-        // Otherwise, this is a negation.  See if the operand is a multiply tree
-        // and if this is not an inner node of a multiply tree.
-        if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
-            (!BI->hasOneUse() ||
-             !isReassociableOp(BI->use_back(), Instruction::Mul))) {
-          BI = LowerNegateToMultiply(BI, ValueRankMap);
-          MadeChange = true;
-        }
       }
     }
+  }
 
-    // If this instruction is a commutative binary operator, process it.
-    if (!BI->isAssociative()) continue;
-    BinaryOperator *I = cast<BinaryOperator>(BI);
+  // If this instruction is a commutative binary operator, process it.
+  if (!BI->isAssociative()) return;
+  BinaryOperator *I = cast<BinaryOperator>(BI);
 
-    // If this is an interior node of a reassociable tree, ignore it until we
-    // get to the root of the tree, to avoid N^2 analysis.
-    if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode()))
-      continue;
+  // If this is an interior node of a reassociable tree, ignore it until we
+  // get to the root of the tree, to avoid N^2 analysis.
+  if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode()))
+    return;
 
-    // If this is an add tree that is used by a sub instruction, ignore it 
-    // until we process the subtract.
-    if (I->hasOneUse() && I->getOpcode() == Instruction::Add &&
-        cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub)
-      continue;
+  // If this is an add tree that is used by a sub instruction, ignore it 
+  // until we process the subtract.
+  if (I->hasOneUse() && I->getOpcode() == Instruction::Add &&
+      cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub)
+    return;
 
-    ReassociateExpression(I);
-  }
+  ReassociateExpression(I);
 }
 
 Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
@@ -1051,6 +1055,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
     // eliminate it.
     DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
     I->replaceAllUsesWith(V);
+    if (Instruction *VI = dyn_cast<Instruction>(V))
+      VI->setDebugLoc(I->getDebugLoc());
     RemoveDeadBinaryOp(I);
     ++NumAnnihil;
     return V;
@@ -1074,6 +1080,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
     // This expression tree simplified to something that isn't a tree,
     // eliminate it.
     I->replaceAllUsesWith(Ops[0].Op);
+    if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
+      OI->setDebugLoc(I->getDebugLoc());
     RemoveDeadBinaryOp(I);
     return Ops[0].Op;
   }
@@ -1091,7 +1099,21 @@ bool Reassociate::runOnFunction(Function &F) {
 
   MadeChange = false;
   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
-    ReassociateBB(FI);
+    for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); )
+      ReassociateInst(BBI);
+
+  // Now that we're done, revisit any instructions which are likely to
+  // have secondary reassociation opportunities.
+  while (!RedoInsts.empty())
+    if (Value *V = RedoInsts.pop_back_val()) {
+      BasicBlock::iterator BBI = cast<Instruction>(V);
+      ReassociateInst(BBI);
+    }
+
+  // Now that we're done, delete any instructions which are no longer used.
+  while (!DeadInsts.empty())
+    if (Value *V = DeadInsts.pop_back_val())
+      RecursivelyDeleteTriviallyDeadInstructions(V);
 
   // We are done with the rank map.
   RankMap.clear();
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 459bb06..47afc77 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -9,7 +9,7 @@
 //
 // This file demotes all registers to memory references.  It is intented to be
 // the inverse of PromoteMemoryToRegister.  By converting to loads, the only
-// values live accross basic blocks are allocas and loads before phi nodes.
+// values live across basic blocks are allocas and loads before phi nodes.
 // It is intended that this should make CFG hacking much easier.
 // To make later hacking easier, the entry block is split into two, such that
 // all introduced allocas and nothing else are in the entry block.
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index c82e929..db8eb85 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1989,7 +1989,7 @@ bool IPSCCP::runOnModule(Module &M) {
     ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
   }
     
-  // If we infered constant or undef values for globals variables, we can delete
+  // If we inferred constant or undef values for globals variables, we can delete
   // the global and any stores that remain to it.
   const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals();
   for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(),
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index bf9ca6d..32a0506 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -17,6 +17,7 @@
 #include "llvm-c/Initialization.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Scalar.h"
@@ -34,7 +35,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeDCEPass(Registry);
   initializeDeadInstEliminationPass(Registry);
   initializeDSEPass(Registry);
-  initializeGEPSplitterPass(Registry);
   initializeGVNPass(Registry);
   initializeEarlyCSEPass(Registry);
   initializeIndVarSimplifyPass(Registry);
@@ -56,7 +56,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeSROA_DTPass(Registry);
   initializeSROA_SSAUpPass(Registry);
   initializeCFGSimplifyPassPass(Registry);
-  initializeSimplifyHalfPowrLibCallsPass(Registry);
   initializeSimplifyLibCallsPass(Registry);
   initializeSinkingPass(Registry);
   initializeTailDupPass(Registry);
@@ -103,6 +102,10 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createLoopDeletionPass());
 }
 
+void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLoopIdiomPass());
+}
+
 void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createLoopRotatePass());
 }
@@ -135,6 +138,10 @@ void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createScalarReplAggregatesPass());
 }
 
+void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createScalarReplAggregatesPass(-1, false));
+}
+
 void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
                                                   int Threshold) {
   unwrap(PM)->add(createScalarReplAggregatesPass(Threshold));
@@ -159,3 +166,19 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
 void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createVerifierPass());
 }
+
+void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createCorrelatedValuePropagationPass());
+}
+
+void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createEarlyCSEPass());
+}
+
+void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createTypeBasedAliasAnalysisPass());
+}
+
+void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createBasicAliasAnalysisPass());
+}
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index c3ca852..8178c27 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -219,7 +219,7 @@ namespace {
 /// optimization, which scans the uses of an alloca and determines if it can
 /// rewrite it in terms of a single new alloca that can be mem2reg'd.
 class ConvertToScalarInfo {
-  /// AllocaSize - The size of the alloca being considered.
+  /// AllocaSize - The size of the alloca being considered in bytes.
   unsigned AllocaSize;
   const TargetData &TD;
 
@@ -238,19 +238,22 @@ class ConvertToScalarInfo {
   /// also declared as a vector, we do want to promote to a vector.
   bool HadAVector;
 
+  /// HadNonMemTransferAccess - True if there is at least one access to the 
+  /// alloca that is not a MemTransferInst.  We don't want to turn structs into
+  /// large integers unless there is some potential for optimization.
+  bool HadNonMemTransferAccess;
+
 public:
   explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
-    : AllocaSize(Size), TD(td) {
-    IsNotTrivial = false;
-    VectorTy = 0;
-    HadAVector = false;
-  }
+    : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0),
+      HadAVector(false), HadNonMemTransferAccess(false) { }
 
   AllocaInst *TryConvert(AllocaInst *AI);
 
 private:
   bool CanConvertToScalar(Value *V, uint64_t Offset);
-  void MergeInType(const Type *In, uint64_t Offset);
+  void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore);
+  bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
   void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
 
   Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
@@ -282,9 +285,14 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
           << *VectorTy << '\n');
     NewTy = VectorTy;  // Use the vector type.
   } else {
+    unsigned BitWidth = AllocaSize * 8;
+    if (!HadAVector && !HadNonMemTransferAccess &&
+        !TD.fitsInLegalInteger(BitWidth))
+      return 0;
+
     DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
     // Create and insert the integer alloca.
-    NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
+    NewTy = IntegerType::get(AI->getContext(), BitWidth);
   }
   AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
   ConvertUsesToScalar(AI, NewAI, 0);
@@ -294,16 +302,21 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
 /// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
 /// so far at the offset specified by Offset (which is specified in bytes).
 ///
-/// There are two cases we handle here:
+/// There are three cases we handle here:
 ///   1) A union of vector types of the same size and potentially its elements.
 ///      Here we turn element accesses into insert/extract element operations.
 ///      This promotes a <4 x float> with a store of float to the third element
 ///      into a <4 x float> that uses insert element.
-///   2) A fully general blob of memory, which we turn into some (potentially
+///   2) A union of vector types with power-of-2 size differences, e.g. a float,
+///      <2 x float> and <4 x float>.  Here we turn element accesses into insert
+///      and extract element operations, and <2 x float> accesses into a cast to
+///      <2 x double>, an extract, and a cast back to <2 x float>.
+///   3) A fully general blob of memory, which we turn into some (potentially
 ///      large) integer type with extract and insert operations where the loads
 ///      and stores would mutate the memory.  We mark this by setting VectorTy
 ///      to VoidTy.
-void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
+void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
+                                      bool IsLoadOrStore) {
   // If we already decided to turn this into a blob of integer memory, there is
   // nothing to be done.
   if (VectorTy && VectorTy->isVoidTy())
@@ -314,33 +327,33 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
   // If the In type is a vector that is the same size as the alloca, see if it
   // matches the existing VecTy.
   if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
-    // Remember if we saw a vector type.
-    HadAVector = true;
-
-    if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
-      // If we're storing/loading a vector of the right size, allow it as a
-      // vector.  If this the first vector we see, remember the type so that
-      // we know the element size.  If this is a subsequent access, ignore it
-      // even if it is a differing type but the same size.  Worst case we can
-      // bitcast the resultant vectors.
-      if (VectorTy == 0)
-        VectorTy = VInTy;
+    if (MergeInVectorType(VInTy, Offset))
       return;
-    }
   } else if (In->isFloatTy() || In->isDoubleTy() ||
              (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
               isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+    // Full width accesses can be ignored, because they can always be turned
+    // into bitcasts.
+    unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+    if (IsLoadOrStore && EltSize == AllocaSize)
+      return;
+
     // If we're accessing something that could be an element of a vector, see
     // if the implied vector agrees with what we already have and if Offset is
     // compatible with it.
-    unsigned EltSize = In->getPrimitiveSizeInBits()/8;
-    if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
-        (VectorTy == 0 ||
-         cast<VectorType>(VectorTy)->getElementType()
-               ->getPrimitiveSizeInBits()/8 == EltSize)) {
-      if (VectorTy == 0)
+    if (Offset % EltSize == 0 && AllocaSize % EltSize == 0) {
+      if (!VectorTy) {
         VectorTy = VectorType::get(In, AllocaSize/EltSize);
-      return;
+        return;
+      }
+
+      unsigned CurrentEltSize = cast<VectorType>(VectorTy)->getElementType()
+                                ->getPrimitiveSizeInBits()/8;
+      if (EltSize == CurrentEltSize)
+        return;
+
+      if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize))
+        return;
     }
   }
 
@@ -349,6 +362,77 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
   VectorTy = Type::getVoidTy(In->getContext());
 }
 
+/// MergeInVectorType - Handles the vector case of MergeInType, returning true
+/// if the type was successfully merged and false otherwise.
+bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
+                                            uint64_t Offset) {
+  // Remember if we saw a vector type.
+  HadAVector = true;
+
+  // TODO: Support nonzero offsets?
+  if (Offset != 0)
+    return false;
+
+  // Only allow vectors that are a power-of-2 away from the size of the alloca.
+  if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
+    return false;
+
+  // If this the first vector we see, remember the type so that we know the
+  // element size.
+  if (!VectorTy) {
+    VectorTy = VInTy;
+    return true;
+  }
+
+  unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+  unsigned InBitWidth = VInTy->getBitWidth();
+
+  // Vectors of the same size can be converted using a simple bitcast.
+  if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8))
+    return true;
+
+  const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType();
+  const Type *InElementTy = cast<VectorType>(VInTy)->getElementType();
+
+  // Do not allow mixed integer and floating-point accesses from vectors of
+  // different sizes.
+  if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
+    return false;
+
+  if (ElementTy->isFloatingPointTy()) {
+    // Only allow floating-point vectors of different sizes if they have the
+    // same element type.
+    // TODO: This could be loosened a bit, but would anything benefit?
+    if (ElementTy != InElementTy)
+      return false;
+
+    // There are no arbitrary-precision floating-point types, which limits the
+    // number of legal vector types with larger element types that we can form
+    // to bitcast and extract a subvector.
+    // TODO: We could support some more cases with mixed fp128 and double here.
+    if (!(BitWidth == 64 || BitWidth == 128) ||
+        !(InBitWidth == 64 || InBitWidth == 128))
+      return false;
+  } else {
+    assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
+                                       "or floating-point.");
+    unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
+    unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
+
+    // Do not allow integer types smaller than a byte or types whose widths are
+    // not a multiple of a byte.
+    if (BitWidth < 8 || InBitWidth < 8 ||
+        BitWidth % 8 != 0 || InBitWidth % 8 != 0)
+      return false;
+  }
+
+  // Pick the largest of the two vector types.
+  if (InBitWidth > BitWidth)
+    VectorTy = VInTy;
+
+  return true;
+}
+
 /// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
 /// its accesses to a single vector type, return true and set VecTy to
 /// the new type.  If we could convert the alloca into a single promotable
@@ -369,7 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
       // Don't touch MMX operations.
       if (LI->getType()->isX86_MMXTy())
         return false;
-      MergeInType(LI->getType(), Offset);
+      HadNonMemTransferAccess = true;
+      MergeInType(LI->getType(), Offset, true);
       continue;
     }
 
@@ -379,7 +464,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
       // Don't touch MMX operations.
       if (SI->getOperand(0)->getType()->isX86_MMXTy())
         return false;
-      MergeInType(SI->getOperand(0)->getType(), Offset);
+      HadNonMemTransferAccess = true;
+      MergeInType(SI->getOperand(0)->getType(), Offset, true);
       continue;
     }
 
@@ -403,6 +489,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
       if (!CanConvertToScalar(GEP, Offset+GEPOffset))
         return false;
       IsNotTrivial = true;  // Can't be mem2reg'd.
+      HadNonMemTransferAccess = true;
       continue;
     }
 
@@ -414,6 +501,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
           !isa<ConstantInt>(MSI->getLength()))
         return false;
       IsNotTrivial = true;  // Can't be mem2reg'd.
+      HadNonMemTransferAccess = true;
       continue;
     }
 
@@ -575,6 +663,63 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
   }
 }
 
+/// getScaledElementType - Gets a scaled element type for a partial vector
+/// access of an alloca. The input types must be integer or floating-point
+/// scalar or vector types, and the resulting type is an integer, float or
+/// double.
+static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2,
+                                        unsigned NewBitWidth) {
+  bool IsFP1 = Ty1->isFloatingPointTy() ||
+               (Ty1->isVectorTy() &&
+                cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy());
+  bool IsFP2 = Ty2->isFloatingPointTy() ||
+               (Ty2->isVectorTy() &&
+                cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy());
+
+  LLVMContext &Context = Ty1->getContext();
+
+  // Prefer floating-point types over integer types, as integer types may have
+  // been created by earlier scalar replacement.
+  if (IsFP1 || IsFP2) {
+    if (NewBitWidth == 32)
+      return Type::getFloatTy(Context);
+    if (NewBitWidth == 64)
+      return Type::getDoubleTy(Context);
+  }
+
+  return Type::getIntNTy(Context, NewBitWidth);
+}
+
+/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector
+/// to another vector of the same element type which has the same allocation
+/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>).
+static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType,
+                                      IRBuilder<> &Builder) {
+  const Type *FromType = FromVal->getType();
+  const VectorType *FromVTy = cast<VectorType>(FromType);
+  const VectorType *ToVTy = cast<VectorType>(ToType);
+  assert((ToVTy->getElementType() == FromVTy->getElementType()) &&
+         "Vectors must have the same element type");
+   Value *UnV = UndefValue::get(FromType);
+   unsigned numEltsFrom = FromVTy->getNumElements();
+   unsigned numEltsTo = ToVTy->getNumElements();
+
+   SmallVector<Constant*, 3> Args;
+   const Type* Int32Ty = Builder.getInt32Ty();
+   unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
+   unsigned i;
+   for (i=0; i != minNumElts; ++i)
+     Args.push_back(ConstantInt::get(Int32Ty, i));
+
+   if (i < numEltsTo) {
+     Constant* UnC = UndefValue::get(Int32Ty);
+     for (; i != numEltsTo; ++i)
+       Args.push_back(UnC);
+   }
+   Constant *Mask = ConstantVector::get(Args);
+   return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV");
+}
+
 /// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
 /// or vector value FromVal, extracting the bits from the offset specified by
 /// Offset.  This returns the value, which is of type ToType.
@@ -589,14 +734,46 @@ Value *ConvertToScalarInfo::
 ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
                            uint64_t Offset, IRBuilder<> &Builder) {
   // If the load is of the whole new alloca, no conversion is needed.
-  if (FromVal->getType() == ToType && Offset == 0)
+  const Type *FromType = FromVal->getType();
+  if (FromType == ToType && Offset == 0)
     return FromVal;
 
   // If the result alloca is a vector type, this is either an element
   // access or a bitcast to another vector type of the same size.
-  if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
-    if (ToType->isVectorTy())
-      return Builder.CreateBitCast(FromVal, ToType, "tmp");
+  if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) {
+    unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
+    if (ToTypeSize == AllocaSize) {
+      // If the two types have the same primitive size, use a bit cast.
+      // Otherwise, it is two vectors with the same element type that has
+      // the same allocation size but different number of elements so use
+      // a shuffle vector.
+      if (FromType->getPrimitiveSizeInBits() ==
+          ToType->getPrimitiveSizeInBits())
+        return Builder.CreateBitCast(FromVal, ToType, "tmp");
+      else
+        return CreateShuffleVectorCast(FromVal, ToType, Builder);
+    }
+
+    if (isPowerOf2_64(AllocaSize / ToTypeSize)) {
+      assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value "
+             "of a smaller vector type at a nonzero offset.");
+
+      const Type *CastElementTy = getScaledElementType(FromType, ToType,
+                                                       ToTypeSize * 8);
+      unsigned NumCastVectorElements = AllocaSize / ToTypeSize;
+
+      LLVMContext &Context = FromVal->getContext();
+      const Type *CastTy = VectorType::get(CastElementTy,
+                                           NumCastVectorElements);
+      Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
+
+      unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
+      unsigned Elt = Offset/EltSize;
+      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+      Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
+                                        Type::getInt32Ty(Context), Elt), "tmp");
+      return Builder.CreateBitCast(Extract, ToType, "tmp");
+    }
 
     // Otherwise it must be an element access.
     unsigned Elt = 0;
@@ -714,21 +891,49 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
 
     // Changing the whole vector with memset or with an access of a different
     // vector type?
-    if (ValSize == VecSize)
-      return Builder.CreateBitCast(SV, AllocaType, "tmp");
+    if (ValSize == VecSize) {
+      // If the two types have the same primitive size, use a bit cast.
+      // Otherwise, it is two vectors with the same element type that has
+      // the same allocation size but different number of elements so use
+      // a shuffle vector.
+      if (VTy->getPrimitiveSizeInBits() ==
+          SV->getType()->getPrimitiveSizeInBits())
+        return Builder.CreateBitCast(SV, AllocaType, "tmp");
+      else
+        return CreateShuffleVectorCast(SV, VTy, Builder);
+    }
 
-    uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+    if (isPowerOf2_64(VecSize / ValSize)) {
+      assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a "
+             "value of a smaller vector type at a nonzero offset.");
 
-    // Must be an element insertion.
-    unsigned Elt = Offset/EltSize;
+      const Type *CastElementTy = getScaledElementType(VTy, SV->getType(),
+                                                       ValSize);
+      unsigned NumCastVectorElements = VecSize / ValSize;
 
-    if (SV->getType() != VTy->getElementType())
-      SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
+      LLVMContext &Context = SV->getContext();
+      const Type *OldCastTy = VectorType::get(CastElementTy,
+                                              NumCastVectorElements);
+      Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
 
-    SV = Builder.CreateInsertElement(Old, SV,
+      Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
+
+      unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
+      unsigned Elt = Offset/EltSize;
+      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+      Value *Insert =
+        Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
+                                        Type::getInt32Ty(Context), Elt), "tmp");
+      return Builder.CreateBitCast(Insert, AllocaType, "tmp");
+    }
+
+    // Must be an element insertion.
+    assert(SV->getType() == VTy->getElementType());
+    uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+    unsigned Elt = Offset/EltSize;
+    return Builder.CreateInsertElement(Old, SV,
                      ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
                                      "tmp");
-    return SV;
   }
 
   // If SV is a first-class aggregate value, insert each value recursively.
@@ -1083,7 +1288,8 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
     }
     
     const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
-    PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN);
+    PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
+                                     PN->getName()+".ld", PN);
 
     // Get the TBAA tag and alignment to use from one of the loads.  It doesn't
     // matter which one we get and if any differ, it doesn't matter.
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index ce5dd73..1137c2b 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -73,7 +73,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
   if (UseLLVMTrap) {
     Function *TrapFn =
       Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
-    CallInst::Create(TrapFn, "", I);
+    CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
+    CallTrap->setDebugLoc(I->getDebugLoc());
   }
   new UnreachableInst(I->getContext(), I);
   
@@ -259,11 +260,12 @@ static bool MergeEmptyReturnBlocks(Function &F) {
     PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
     if (RetBlockPHI == 0) {
       Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
-      RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), "merge",
+      pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock);
+      RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
+                                    std::distance(PB, PE), "merge",
                                     &RetBlock->front());
       
-      for (pred_iterator PI = pred_begin(RetBlock), E = pred_end(RetBlock);
-           PI != E; ++PI)
+      for (pred_iterator PI = PB; PI != PE; ++PI)
         RetBlockPHI->addIncoming(InVal, *PI);
       RetBlock->getTerminator()->setOperand(0, RetBlockPHI);
     }
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
deleted file mode 100644
index 70ff32e..0000000
--- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-//===- SimplifyHalfPowrLibCalls.cpp - Optimize specific half_powr calls ---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a simple pass that applies an experimental
-// transformation on calls to specific functions.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "simplify-libcalls-halfpowr"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
-using namespace llvm;
-
-namespace {
-  /// This pass optimizes well half_powr function calls.
-  ///
-  class SimplifyHalfPowrLibCalls : public FunctionPass {
-    const TargetData *TD;
-  public:
-    static char ID; // Pass identification
-    SimplifyHalfPowrLibCalls() : FunctionPass(ID) {
-      initializeSimplifyHalfPowrLibCallsPass(*PassRegistry::getPassRegistry());
-    }
-
-    bool runOnFunction(Function &F);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    }
-
-    Instruction *
-    InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
-                    Instruction *InsertPt);
-  };
-  char SimplifyHalfPowrLibCalls::ID = 0;
-} // end anonymous namespace.
-
-INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr",
-                "Simplify half_powr library calls", false, false)
-
-// Public interface to the Simplify HalfPowr LibCalls pass.
-FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
-  return new SimplifyHalfPowrLibCalls(); 
-}
-
-/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging
-/// their control flow to better facilitate subsequent optimization.
-Instruction *
-SimplifyHalfPowrLibCalls::
-InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
-                Instruction *InsertPt) {
-  std::vector<BasicBlock *> Bodies;
-  BasicBlock *NewBlock = 0;
-
-  for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) {
-    CallInst *Call = cast<CallInst>(HalfPowrs[i]);
-    Function *Callee = Call->getCalledFunction();
-
-    // Minimally sanity-check the CFG of half_powr to ensure that it contains
-    // the kind of code we expect.  If we're running this pass, we have
-    // reason to believe it will be what we expect.
-    Function::iterator I = Callee->begin();
-    BasicBlock *Prologue = I++;
-    if (I == Callee->end()) break;
-    BasicBlock *SubnormalHandling = I++;
-    if (I == Callee->end()) break;
-    BasicBlock *Body = I++;
-    if (I != Callee->end()) break;
-    if (SubnormalHandling->getSinglePredecessor() != Prologue)
-      break;
-    BranchInst *PBI = dyn_cast<BranchInst>(Prologue->getTerminator());
-    if (!PBI || !PBI->isConditional())
-      break;
-    BranchInst *SNBI = dyn_cast<BranchInst>(SubnormalHandling->getTerminator());
-    if (!SNBI || SNBI->isConditional())
-      break;
-    if (!isa<ReturnInst>(Body->getTerminator()))
-      break;
-
-    Instruction *NextInst = llvm::next(BasicBlock::iterator(Call));
-
-    // Inline the call, taking care of what code ends up where.
-    NewBlock = SplitBlock(NextInst->getParent(), NextInst, this);
-
-    InlineFunctionInfo IFI(0, TD);
-    bool B = InlineFunction(Call, IFI);
-    assert(B && "half_powr didn't inline?");
-    (void)B;
-
-    BasicBlock *NewBody = NewBlock->getSinglePredecessor();
-    assert(NewBody);
-    Bodies.push_back(NewBody);
-  }
-
-  if (!NewBlock)
-    return InsertPt;
-
-  // Put the code for all the bodies into one block, to facilitate
-  // subsequent optimization.
-  (void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this);
-  for (unsigned i = 0, e = Bodies.size(); i != e; ++i) {
-    BasicBlock *Body = Bodies[i];
-    Instruction *FNP = Body->getFirstNonPHI();
-    // Splice the insts from body into NewBlock.
-    NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(),
-                                   FNP, Body->getTerminator());
-  }
-
-  return NewBlock->begin();
-}
-
-/// runOnFunction - Top level algorithm.
-///
-bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) {
-  TD = getAnalysisIfAvailable<TargetData>();
-  
-  bool Changed = false;
-  std::vector<Instruction *> HalfPowrs;
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-      // Look for calls.
-      bool IsHalfPowr = false;
-      if (CallInst *CI = dyn_cast<CallInst>(I)) {
-        // Look for direct calls and calls to non-external functions.
-        Function *Callee = CI->getCalledFunction();
-        if (Callee && Callee->hasExternalLinkage()) {
-          // Look for calls with well-known names.
-          if (Callee->getName() == "__half_powrf4")
-            IsHalfPowr = true;
-        }
-      }
-      if (IsHalfPowr)
-        HalfPowrs.push_back(I);
-      // We're looking for sequences of up to three such calls, which we'll
-      // simplify as a group.
-      if ((!IsHalfPowr && !HalfPowrs.empty()) || HalfPowrs.size() == 3) {
-        I = InlineHalfPowrs(HalfPowrs, I);
-        E = I->getParent()->end();
-        HalfPowrs.clear();
-        Changed = true;
-      }
-    }
-    assert(HalfPowrs.empty() && "Block had no terminator!");
-  }
-
-  return Changed;
-}
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 9f136d4..6247b03 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -49,6 +49,7 @@ class LibCallOptimization {
 protected:
   Function *Caller;
   const TargetData *TD;
+  const TargetLibraryInfo *TLI;
   LLVMContext* Context;
 public:
   LibCallOptimization() { }
@@ -62,9 +63,11 @@ public:
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
     =0;
 
-  Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) {
+  Value *OptimizeCall(CallInst *CI, const TargetData *TD,
+                      const TargetLibraryInfo *TLI, IRBuilder<> &B) {
     Caller = CI->getParent()->getParent();
     this->TD = TD;
+    this->TLI = TLI;
     if (CI->getCalledFunction())
       Context = &CI->getCalledFunction()->getContext();
 
@@ -97,6 +100,15 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
   }
   return true;
 }
+ 
+static bool CallHasFloatingPointArgument(const CallInst *CI) {
+  for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
+       it != e; ++it) {
+    if ((*it)->getType()->isFloatingPointTy())
+      return true;
+  }
+  return false;
+}
 
 /// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality
 /// comparisons with With.
@@ -1075,14 +1087,8 @@ struct ToAsciiOpt : public LibCallOptimization {
 // 'printf' Optimizations
 
 struct PrintFOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Require one fixed pointer argument and an integer/void result.
-    const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
-        !(FT->getReturnType()->isIntegerTy() ||
-          FT->getReturnType()->isVoidTy()))
-      return 0;
-
+  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
     // Check for a fixed format string.
     std::string FormatStr;
     if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr))
@@ -1138,20 +1144,40 @@ struct PrintFOpt : public LibCallOptimization {
     }
     return 0;
   }
+
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require one fixed pointer argument and an integer/void result.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+        !(FT->getReturnType()->isIntegerTy() ||
+          FT->getReturnType()->isVoidTy()))
+      return 0;
+
+    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // printf(format, ...) -> iprintf(format, ...) if no floating point
+    // arguments.
+    if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *IPrintFFn =
+        M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(IPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
 };
 
 //===---------------------------------------===//
 // 'sprintf' Optimizations
 
 struct SPrintFOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Require two fixed pointer arguments and an integer result.
-    const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
-        !FT->getParamType(1)->isPointerTy() ||
-        !FT->getReturnType()->isIntegerTy())
-      return 0;
-
+  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
     // Check for a fixed format string.
     std::string FormatStr;
     if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
@@ -1212,6 +1238,32 @@ struct SPrintFOpt : public LibCallOptimization {
     }
     return 0;
   }
+
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require two fixed pointer arguments and an integer result.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+    // point arguments.
+    if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *SIPrintFFn =
+        M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(SIPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
 };
 
 //===---------------------------------------===//
@@ -1278,14 +1330,8 @@ struct FPutsOpt : public LibCallOptimization {
 // 'fprintf' Optimizations
 
 struct FPrintFOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Require two fixed paramters as pointers and integer result.
-    const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
-        !FT->getParamType(1)->isPointerTy() ||
-        !FT->getReturnType()->isIntegerTy())
-      return 0;
-
+  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
     // All the optimizations depend on the format string.
     std::string FormatStr;
     if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
@@ -1330,6 +1376,32 @@ struct FPrintFOpt : public LibCallOptimization {
     }
     return 0;
   }
+
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require two fixed paramters as pointers and integer result.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+    // floating point arguments.
+    if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *FIPrintFFn =
+        M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(FIPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
 };
 
 //===---------------------------------------===//
@@ -1544,8 +1616,11 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
       // Set the builder to the instruction after the call.
       Builder.SetInsertPoint(BB, I);
 
+      // Use debug location of CI for all new instructions.
+      Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
       // Try to optimize this call.
-      Value *Result = LCO->OptimizeCall(CI, TD, Builder);
+      Value *Result = LCO->OptimizeCall(CI, TD, TLI, Builder);
       if (Result == 0) continue;
 
       DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI;
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 5b6bc04..539cc6f 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -36,7 +36,7 @@
 //     evaluated each time through the tail recursion.  Safely keeping allocas
 //     in the entry block requires analysis to proves that the tail-called
 //     function does not read or write the stack object.
-//  2. Tail recursion is only performed if the call immediately preceeds the
+//  2. Tail recursion is only performed if the call immediately precedes the
 //     return instruction.  It's possible that there could be a jump between
 //     the call and the return.
 //  3. There can be intervening operations between the call and the return that
@@ -433,7 +433,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
     if (CanMoveAboveCall(BBI, CI)) continue;
     
     // If we can't move the instruction above the call, it might be because it
-    // is an associative and commutative operation that could be tranformed
+    // is an associative and commutative operation that could be transformed
     // using accumulator recursion elimination.  Check to see if this is the
     // case, and if so, remember the initial accumulator value for later.
     if ((AccumulatorRecursionEliminationInitVal =
@@ -496,7 +496,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
     Instruction *InsertPos = OldEntry->begin();
     for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
          I != E; ++I) {
-      PHINode *PN = PHINode::Create(I->getType(),
+      PHINode *PN = PHINode::Create(I->getType(), 2,
                                     I->getName() + ".tr", InsertPos);
       I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
       PN->addIncoming(I, NewEntry);
@@ -527,8 +527,10 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
   if (AccumulatorRecursionEliminationInitVal) {
     Instruction *AccRecInstr = AccumulatorRecursionInstr;
     // Start by inserting a new PHI node for the accumulator.
+    pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
     PHINode *AccPN =
       PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
+                      std::distance(PB, PE) + 1,
                       "accumulator.tr", OldEntry->begin());
 
     // Loop over all of the predecessors of the tail recursion block.  For the
@@ -537,8 +539,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
     // other tail recursions eliminated) the accumulator is not modified.
     // Because we haven't added the branch in the current block to OldEntry yet,
     // it will not show up as a predecessor.
-    for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry);
-         PI != PE; ++PI) {
+    for (pred_iterator PI = PB; PI != PE; ++PI) {
       BasicBlock *P = *PI;
       if (P == &F->getEntryBlock())
         AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
@@ -572,7 +573,9 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
 
   // Now that all of the PHI nodes are in place, remove the call and
   // ret instructions, replacing them with an unconditional branch.
-  BranchInst::Create(OldEntry, Ret);
+  BranchInst *NewBI = BranchInst::Create(OldEntry, Ret);
+  NewBI->setDebugLoc(CI->getDebugLoc());
+
   BB->getInstList().erase(Ret);  // Remove return.
   BB->getInstList().erase(CI);   // Remove call.
   ++NumEliminated;
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index acaea19..c705cc5 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -447,7 +447,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
       // If the values coming into the block are not the same, we need a PHI.
       // Create the new PHI node, insert it into NewBB at the end of the block
       PHINode *NewPHI =
-        PHINode::Create(PN->getType(), PN->getName()+".ph", BI);
+        PHINode::Create(PN->getType(), NumPreds, PN->getName()+".ph", BI);
       if (AA) AA->copyValue(PN, NewPHI);
       
       // Move all of the PHI values for 'Preds' to the new PHI.
@@ -538,3 +538,15 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
   UncondBranch->eraseFromParent();
   return cast<ReturnInst>(NewRet);
 }
+
+/// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a 
+/// given basic block.
+DebugLoc llvm::GetFirstDebugLocInBasicBlock(const BasicBlock *BB) {
+  for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end(); 
+       BI != BE; ++BI) {
+    DebugLoc DL = BI->getDebugLoc();
+    if (!DL.isUnknown())
+      return DL;
+  }
+  return DebugLoc();
+}
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 616b066..caf2aeb 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -56,7 +56,7 @@ char BreakCriticalEdges::ID = 0;
 INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
                 "Break critical edges in CFG", false, false)
 
-// Publically exposed interface to pass...
+// Publicly exposed interface to pass...
 char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
 FunctionPass *llvm::createBreakCriticalEdgesPass() {
   return new BreakCriticalEdges();
@@ -140,7 +140,7 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
       if (VP->getParent() == SplitBB)
         continue;
     // Otherwise a new PHI is needed. Create one and populate it.
-    PHINode *NewPN = PHINode::Create(PN->getType(), "split",
+    PHINode *NewPN = PHINode::Create(PN->getType(), Preds.size(), "split",
                                      SplitBB->getTerminator());
     for (unsigned i = 0, e = Preds.size(); i != e; ++i)
       NewPN->addIncoming(V, Preds[i]);
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index e633772..8c133ea 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -104,7 +104,7 @@ namespace {
 /// region, we need to split the entry block of the region so that the PHI node
 /// is easier to deal with.
 void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
-  bool HasPredsFromRegion = false;
+  unsigned NumPredsFromRegion = 0;
   unsigned NumPredsOutsideRegion = 0;
 
   if (Header != &Header->getParent()->getEntryBlock()) {
@@ -116,7 +116,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
     // header block into two.
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
       if (BlocksToExtract.count(PN->getIncomingBlock(i)))
-        HasPredsFromRegion = true;
+        ++NumPredsFromRegion;
       else
         ++NumPredsOutsideRegion;
 
@@ -147,7 +147,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
 
   // Okay, now we need to adjust the PHI nodes and any branches from within the
   // region to go to the new header block instead of the old header block.
-  if (HasPredsFromRegion) {
+  if (NumPredsFromRegion) {
     PHINode *PN = cast<PHINode>(OldPred->begin());
     // Loop over all of the predecessors of OldPred that are in the region,
     // changing them to branch to NewBB instead.
@@ -157,14 +157,14 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
         TI->replaceUsesOfWith(OldPred, NewBB);
       }
 
-    // Okay, everthing within the region is now branching to the right block, we
+    // Okay, everything within the region is now branching to the right block, we
     // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
     for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
       PHINode *PN = cast<PHINode>(AfterPHIs);
       // Create a new PHI node in the new region, which has an incoming value
       // from OldPred of PN.
-      PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".ce",
-                                       NewBB->begin());
+      PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
+                                       PN->getName()+".ce", NewBB->begin());
       NewPN->addIncoming(PN, OldPred);
 
       // Loop over all of the incoming value in PN, moving them to NewPN if they
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index c1faf24..7d17909 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -320,7 +320,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
 //
 // Note that this only does one level of inlining.  For example, if the
 // instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
-// exists in the instruction stream.  Similiarly this will inline a recursive
+// exists in the instruction stream.  Similarly this will inline a recursive
 // function by one level.
 //
 bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
@@ -624,7 +624,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
     // The PHI node should go at the front of the new basic block to merge all
     // possible incoming values.
     if (!TheCall->use_empty()) {
-      PHI = PHINode::Create(RTy, TheCall->getName(),
+      PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
                             AfterCallBB->begin());
       // Anything that used the result of the function call should now use the
       // PHI node as their operand.
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index b2e5fa6..b654111 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -207,6 +207,8 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
 
   DomTreeNode *DomNode = DT->getNode(DomBB);
 
+  SmallVector<PHINode*, 16> AddedPHIs;
+
   SSAUpdater SSAUpdate;
   SSAUpdate.Initialize(Inst->getType(), Inst->getName());
   
@@ -220,9 +222,10 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
     // If we already inserted something for this BB, don't reprocess it.
     if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
     
-    PHINode *PN = PHINode::Create(Inst->getType(), Inst->getName()+".lcssa",
+    PHINode *PN = PHINode::Create(Inst->getType(),
+                                  PredCache.GetNumPreds(ExitBB),
+                                  Inst->getName()+".lcssa",
                                   ExitBB->begin());
-    PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB));
 
     // Add inputs from inside the loop for this PHI.
     for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
@@ -236,6 +239,8 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
           &PN->getOperandUse(
             PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
     }
+
+    AddedPHIs.push_back(PN);
     
     // Remember that this phi makes the value alive in this block.
     SSAUpdate.AddAvailableValue(ExitBB, PN);
@@ -262,6 +267,12 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
     // Otherwise, do full PHI insertion.
     SSAUpdate.RewriteUse(*UsesToRewrite[i]);
   }
+
+  // Remove PHI nodes that did not have any uses rewritten.
+  for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) {
+    if (AddedPHIs[i]->use_empty())
+      AddedPHIs[i]->eraseFromParent();
+  }
   
   return true;
 }
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 3f789fa..4bca2fc 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -20,8 +20,11 @@
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -65,8 +68,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
 
       // Let the basic block know that we are letting go of it.  Based on this,
       // it will adjust it's PHI nodes.
-      assert(BI->getParent() && "Terminator not inserted in block!");
-      OldDest->removePredecessor(BI->getParent());
+      OldDest->removePredecessor(BB);
 
       // Replace the conditional branch with an unconditional one.
       BranchInst::Create(Destination, BI);
@@ -209,8 +211,18 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
 bool llvm::isInstructionTriviallyDead(Instruction *I) {
   if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
 
-  // We don't want debug info removed by anything this general.
-  if (isa<DbgInfoIntrinsic>(I)) return false;
+  // We don't want debug info removed by anything this general, unless
+  // debug info is empty.
+  if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
+    if (DDI->getAddress()) 
+      return false;
+    return true;
+  } 
+  if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
+    if (DVI->getValue())
+      return false;
+    return true;
+  }
 
   if (!I->mayHaveSideEffects()) return true;
 
@@ -320,8 +332,14 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
         BI = BB->begin();
       continue;
     }
-    
+
+    if (Inst->isTerminator())
+      break;
+
+    WeakVH BIHandle(BI);
     MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst);
+    if (BIHandle != BI)
+      BI = BB->begin();
   }
   return MadeChange;
 }
@@ -632,6 +650,8 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
       Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
       Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
     }
+    // Avoid colliding with the DenseMap sentinels ~0 and ~0-1.
+    Hash >>= 1;
     // If we've never seen this hash value before, it's a unique PHI.
     std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
       HashMap.insert(std::make_pair(Hash, PN));
@@ -753,3 +773,83 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
   return Align;
 }
 
+///===---------------------------------------------------------------------===//
+///  Dbg Intrinsic utilities
+///
+
+/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+                                           StoreInst *SI, DIBuilder &Builder) {
+  DIVariable DIVar(DDI->getVariable());
+  if (!DIVar.Verify())
+    return false;
+
+  Instruction *DbgVal = 
+    Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0,
+                                    DIVar, SI);
+  
+  // Propagate any debug metadata from the store onto the dbg.value.
+  DebugLoc SIDL = SI->getDebugLoc();
+  if (!SIDL.isUnknown())
+    DbgVal->setDebugLoc(SIDL);
+  // Otherwise propagate debug metadata from dbg.declare.
+  else
+    DbgVal->setDebugLoc(DDI->getDebugLoc());
+  return true;
+}
+
+/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+                                           LoadInst *LI, DIBuilder &Builder) {
+  DIVariable DIVar(DDI->getVariable());
+  if (!DIVar.Verify())
+    return false;
+
+  Instruction *DbgVal = 
+    Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
+                                    DIVar, LI);
+  
+  // Propagate any debug metadata from the store onto the dbg.value.
+  DebugLoc LIDL = LI->getDebugLoc();
+  if (!LIDL.isUnknown())
+    DbgVal->setDebugLoc(LIDL);
+  // Otherwise propagate debug metadata from dbg.declare.
+  else
+    DbgVal->setDebugLoc(DDI->getDebugLoc());
+  return true;
+}
+
+/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
+/// of llvm.dbg.value intrinsics.
+bool llvm::LowerDbgDeclare(Function &F) {
+  DIBuilder DIB(*F.getParent());
+  SmallVector<DbgDeclareInst *, 4> Dbgs;
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) {
+      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+        Dbgs.push_back(DDI);
+    }
+  if (Dbgs.empty())
+    return false;
+
+  for (SmallVector<DbgDeclareInst *, 4>::iterator I = Dbgs.begin(),
+         E = Dbgs.end(); I != E; ++I) {
+    DbgDeclareInst *DDI = *I;
+    if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+      bool RemoveDDI = true;
+      for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+           UI != E; ++UI)
+        if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+          ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+        else if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+          ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
+        else
+          RemoveDDI = false;
+      if (RemoveDDI)
+        DDI->eraseFromParent();
+    }
+  }
+  return true;
+}
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 2462630..f02ffd2 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -115,7 +115,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
                 "Canonicalize natural loops", true, false)
 
-// Publically exposed interface to pass...
+// Publicly exposed interface to pass...
 char &llvm::LoopSimplifyID = LoopSimplify::ID;
 Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
 
@@ -648,9 +648,8 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
   // the backedge block which correspond to any PHI nodes in the header block.
   for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
     PHINode *PN = cast<PHINode>(I);
-    PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".be",
-                                     BETerminator);
-    NewPN->reserveOperandSpace(BackedgeBlocks.size());
+    PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
+                                     PN->getName()+".be", BETerminator);
     if (AA) AA->copyValue(PN, NewPN);
 
     // Loop over the PHI node, moving all entries except the one for the
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 914a439..ed733d3 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -84,7 +84,7 @@ char LowerSwitch::ID = 0;
 INITIALIZE_PASS(LowerSwitch, "lowerswitch",
                 "Lower SwitchInst's to branches", false, false)
 
-// Publically exposed interface to pass...
+// Publicly exposed interface to pass...
 char &llvm::LowerSwitchID = LowerSwitch::ID;
 // createLowerSwitchPass - Interface to this file...
 FunctionPass *llvm::createLowerSwitchPass() {
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 7788857..50c9ae2 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Analysis/DIBuilder.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -45,7 +46,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
 #include <algorithm>
-#include <map>
 #include <queue>
 using namespace llvm;
 
@@ -103,7 +103,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
 /// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the
 /// alloca 'V', if any.
 static DbgDeclareInst *FindAllocaDbgDeclare(Value *V) {
-  if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), &V, 1))
+  if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V))
     for (Value::use_iterator UI = DebugNode->use_begin(),
          E = DebugNode->use_end(); UI != E; ++UI)
       if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
@@ -273,8 +273,6 @@ namespace {
                                   LargeBlockInfo &LBI);
     void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
                                   LargeBlockInfo &LBI);
-    void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI);
-
     
     void RenamePass(BasicBlock *BB, BasicBlock *Pred,
                     RenamePassData::ValVector &IncVals,
@@ -391,7 +389,9 @@ void PromoteMem2Reg::run() {
       if (Info.UsingBlocks.empty()) {
         // Record debuginfo for the store and remove the declaration's debuginfo.
         if (DbgDeclareInst *DDI = Info.DbgDeclare) {
-          ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore);
+          if (!DIB)
+            DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent());
+          ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB);
           DDI->eraseFromParent();
         }
         // Remove the (now dead) store and alloca.
@@ -423,8 +423,11 @@ void PromoteMem2Reg::run() {
         while (!AI->use_empty()) {
           StoreInst *SI = cast<StoreInst>(AI->use_back());
           // Record debuginfo for the store before removing it.
-          if (DbgDeclareInst *DDI = Info.DbgDeclare)
-            ConvertDebugDeclareToDebugValue(DDI, SI);
+          if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+            if (!DIB)
+              DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
+            ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+          }
           SI->eraseFromParent();
           LBI.deleteValue(SI);
         }
@@ -944,28 +947,6 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
   }
 }
 
-// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
-// that has an associated llvm.dbg.decl intrinsic.
-void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
-                                                     StoreInst *SI) {
-  DIVariable DIVar(DDI->getVariable());
-  if (!DIVar.Verify())
-    return;
-
-  if (!DIB)
-    DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
-  Instruction *DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0,
-                                                     DIVar, SI);
-  
-  // Propagate any debug metadata from the store onto the dbg.value.
-  DebugLoc SIDL = SI->getDebugLoc();
-  if (!SIDL.isUnknown())
-    DbgVal->setDebugLoc(SIDL);
-  // Otherwise propagate debug metadata from dbg.declare.
-  else
-    DbgVal->setDebugLoc(DDI->getDebugLoc());
-}
-
 // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
 // Alloca returns true if there wasn't already a phi-node for that variable
 //
@@ -979,12 +960,11 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
 
   // Create a PhiNode using the dereferenced type... and add the phi-node to the
   // BasicBlock.
-  PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(),
+  PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
                        Allocas[AllocaNo]->getName() + "." + Twine(Version++), 
                        BB->begin());
   ++NumPHIInsert;
   PhiToAllocaMap[PN] = AllocaNo;
-  PN->reserveOperandSpace(getNumPreds(BB));
 
   if (AST && PN->getType()->isPointerTy())
     AST->copyValue(PointerAllocaValues[AllocaNo], PN);
@@ -1076,8 +1056,11 @@ NextIteration:
       // what value were we writing?
       IncomingVals[ai->second] = SI->getOperand(0);
       // Record debuginfo for the store before removing it.
-      if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
-        ConvertDebugDeclareToDebugValue(DDI, SI);
+      if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) {
+        if (!DIB)
+          DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
+        ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+      }
       BB->getInstList().erase(SI);
     }
   }
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 3896d98..2860c3e 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "ssaupdater"
+#include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -20,8 +21,10 @@
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+
 using namespace llvm;
 
 typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
@@ -170,8 +173,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
   }
 
   // Ok, we have no way out, insert a new one now.
-  PHINode *InsertedPHI = PHINode::Create(ProtoType, ProtoName, &BB->front());
-  InsertedPHI->reserveOperandSpace(PredValues.size());
+  PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(),
+                                         ProtoName, &BB->front());
 
   // Fill in all the predecessors of the PHI.
   for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
@@ -184,6 +187,9 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
     return V;
   }
 
+  // Set DebugLoc.
+  InsertedPHI->setDebugLoc(GetFirstDebugLocInBasicBlock(BB));
+
   // If the client wants to know about all new instructions, tell it.
   if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
 
@@ -289,9 +295,8 @@ public:
   /// Reserve space for the operands but do not fill them in yet.
   static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
                                SSAUpdater *Updater) {
-    PHINode *PHI = PHINode::Create(Updater->ProtoType, Updater->ProtoName,
-                                   &BB->front());
-    PHI->reserveOperandSpace(NumPreds);
+    PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds,
+                                   Updater->ProtoName, &BB->front());
     return PHI;
   }
 
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index c670885..18b8573 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -37,6 +37,10 @@
 #include <map>
 using namespace llvm;
 
+static cl::opt<unsigned>
+PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
+   cl::desc("Control the amount of phi node folding to perform (default = 1)"));
+
 static cl::opt<bool>
 DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
        cl::desc("Duplicate return instructions into unconditional branches"));
@@ -201,11 +205,20 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
 /// which works well enough for us.
 ///
 /// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
-/// see if V (which must be an instruction) is cheap to compute and is
-/// non-trapping.  If both are true, the instruction is inserted into the set
-/// and true is returned.
+/// see if V (which must be an instruction) and its recursive operands
+/// that do not dominate BB have a combined cost lower than CostRemaining and
+/// are non-trapping.  If both are true, the instruction is inserted into the
+/// set and true is returned.
+///
+/// The cost for most non-trapping instructions is defined as 1 except for
+/// Select whose cost is 2.
+///
+/// After this function returns, CostRemaining is decreased by the cost of
+/// V plus its non-dominating operands.  If that cost is greater than
+/// CostRemaining, false is returned and CostRemaining is undefined.
 static bool DominatesMergePoint(Value *V, BasicBlock *BB,
-                                SmallPtrSet<Instruction*, 4> *AggressiveInsts) {
+                                SmallPtrSet<Instruction*, 4> *AggressiveInsts,
+                                unsigned &CostRemaining) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) {
     // Non-instructions all dominate instructions, but not all constantexprs
@@ -232,12 +245,17 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   // instructions in the 'if region'.
   if (AggressiveInsts == 0) return false;
   
+  // If we have seen this instruction before, don't count it again.
+  if (AggressiveInsts->count(I)) return true;
+
   // Okay, it looks like the instruction IS in the "condition".  Check to
   // see if it's a cheap instruction to unconditionally compute, and if it
   // only uses stuff defined outside of the condition.  If so, hoist it out.
   if (!I->isSafeToSpeculativelyExecute())
     return false;
 
+  unsigned Cost = 0;
+
   switch (I->getOpcode()) {
   default: return false;  // Cannot hoist this out safely.
   case Instruction::Load:
@@ -246,11 +264,13 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
     // predecessor.
     if (PBB->getFirstNonPHIOrDbg() != I)
       return false;
+    Cost = 1;
     break;
   case Instruction::GetElementPtr:
     // GEPs are cheap if all indices are constant.
     if (!cast<GetElementPtrInst>(I)->hasAllConstantIndices())
       return false;
+    Cost = 1;
     break;
   case Instruction::Add:
   case Instruction::Sub:
@@ -261,13 +281,26 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   case Instruction::LShr:
   case Instruction::AShr:
   case Instruction::ICmp:
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+    Cost = 1;
     break;   // These are all cheap and non-trapping instructions.
+
+  case Instruction::Select:
+    Cost = 2;
+    break;
   }
 
-  // Okay, we can only really hoist these out if their operands are not
-  // defined in the conditional region.
+  if (Cost > CostRemaining)
+    return false;
+
+  CostRemaining -= Cost;
+
+  // Okay, we can only really hoist these out if their operands do
+  // not take us over the cost threshold.
   for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
-    if (!DominatesMergePoint(*i, BB, 0))
+    if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining))
       return false;
   // Okay, it's safe to do this!  Remember this instruction.
   AggressiveInsts->insert(I);
@@ -807,12 +840,16 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
   BasicBlock::iterator BB2_Itr = BB2->begin();
 
   Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++;
-  while (isa<DbgInfoIntrinsic>(I1))
-    I1 = BB1_Itr++;
-  while (isa<DbgInfoIntrinsic>(I2))
-    I2 = BB2_Itr++;
-  if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||
-      !I1->isIdenticalToWhenDefined(I2) ||
+  // Skip debug info if it is not identical.
+  DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+  DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+  if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+    while (isa<DbgInfoIntrinsic>(I1))
+      I1 = BB1_Itr++;
+    while (isa<DbgInfoIntrinsic>(I2))
+      I2 = BB2_Itr++;
+  }
+  if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
       (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
     return false;
 
@@ -835,13 +872,17 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
     I2->eraseFromParent();
 
     I1 = BB1_Itr++;
-    while (isa<DbgInfoIntrinsic>(I1))
-      I1 = BB1_Itr++;
     I2 = BB2_Itr++;
-    while (isa<DbgInfoIntrinsic>(I2))
-      I2 = BB2_Itr++;
-  } while (I1->getOpcode() == I2->getOpcode() &&
-           I1->isIdenticalToWhenDefined(I2));
+    // Skip debug info if it is not identical.
+    DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
+    DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
+    if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
+      while (isa<DbgInfoIntrinsic>(I1))
+        I1 = BB1_Itr++;
+      while (isa<DbgInfoIntrinsic>(I2))
+        I2 = BB2_Itr++;
+    }
+  } while (I1->isIdenticalToWhenDefined(I2));
 
   return true;
 
@@ -1209,6 +1250,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
   // instructions.  While we are at it, keep track of the instructions
   // that need to be moved to the dominating block.
   SmallPtrSet<Instruction*, 4> AggressiveInsts;
+  unsigned MaxCostVal0 = PHINodeFoldingThreshold,
+           MaxCostVal1 = PHINodeFoldingThreshold;
   
   for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
     PHINode *PN = cast<PHINode>(II++);
@@ -1218,8 +1261,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
       continue;
     }
     
-    if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts) ||
-        !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts))
+    if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
+                             MaxCostVal0) ||
+        !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
+                             MaxCostVal1))
       return false;
   }
   
@@ -1393,24 +1438,23 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
   return true;
 }
 
-/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch,
-/// and if a predecessor branches to us and one of our successors, fold the
-/// setcc into the predecessor and use logical operations to pick the right
-/// destination.
+/// FoldBranchToCommonDest - If this basic block is simple enough, and if a
+/// predecessor branches to us and one of our successors, fold the block into
+/// the predecessor and use logical operations to pick the right destination.
 bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
   BasicBlock *BB = BI->getParent();
   Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
   if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
     Cond->getParent() != BB || !Cond->hasOneUse())
   return false;
-  
+
   // Only allow this if the condition is a simple instruction that can be
   // executed unconditionally.  It must be in the same block as the branch, and
   // must be at the front of the block.
   BasicBlock::iterator FrontIt = BB->front();
+
   // Ignore dbg intrinsics.
-  while (isa<DbgInfoIntrinsic>(FrontIt))
-    ++FrontIt;
+  while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
     
   // Allow a single instruction to be hoisted in addition to the compare
   // that feeds the branch.  We later ensure that any values that _it_ uses
@@ -1422,21 +1466,23 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
       FrontIt->isSafeToSpeculativelyExecute()) {
     BonusInst = &*FrontIt;
     ++FrontIt;
+    
+    // Ignore dbg intrinsics.
+    while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
   }
-  
+
   // Only a single bonus inst is allowed.
   if (&*FrontIt != Cond)
     return false;
   
   // Make sure the instruction after the condition is the cond branch.
   BasicBlock::iterator CondIt = Cond; ++CondIt;
+
   // Ingore dbg intrinsics.
-  while(isa<DbgInfoIntrinsic>(CondIt))
-    ++CondIt;
-  if (&*CondIt != BI) {
-    assert (!isa<DbgInfoIntrinsic>(CondIt) && "Hey do not forget debug info!");
+  while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
+  
+  if (&*CondIt != BI)
     return false;
-  }
 
   // Cond is known to be a compare or binary operator.  Check to make sure that
   // neither operand is a potentially-trapping constant expression.
@@ -1447,13 +1493,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
     if (CE->canTrap())
       return false;
   
-  
   // Finally, don't infinitely unroll conditional loops.
   BasicBlock *TrueDest  = BI->getSuccessor(0);
   BasicBlock *FalseDest = BI->getSuccessor(1);
   if (TrueDest == BB || FalseDest == BB)
     return false;
-  
+
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
     BasicBlock *PredBlock = *PI;
     BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
@@ -1461,10 +1506,24 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
     // Check that we have two conditional branches.  If there is a PHI node in
     // the common successor, verify that the same value flows in from both
     // blocks.
-    if (PBI == 0 || PBI->isUnconditional() ||
-        !SafeToMergeTerminators(BI, PBI))
+    if (PBI == 0 || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
       continue;
     
+    // Determine if the two branches share a common destination.
+    Instruction::BinaryOps Opc;
+    bool InvertPredCond = false;
+    
+    if (PBI->getSuccessor(0) == TrueDest)
+      Opc = Instruction::Or;
+    else if (PBI->getSuccessor(1) == FalseDest)
+      Opc = Instruction::And;
+    else if (PBI->getSuccessor(0) == FalseDest)
+      Opc = Instruction::And, InvertPredCond = true;
+    else if (PBI->getSuccessor(1) == TrueDest)
+      Opc = Instruction::Or, InvertPredCond = true;
+    else
+      continue;
+
     // Ensure that any values used in the bonus instruction are also used
     // by the terminator of the predecessor.  This means that those values
     // must already have been resolved, so we won't be inhibiting the 
@@ -1502,20 +1561,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
       
       if (!UsedValues.empty()) return false;
     }
-    
-    Instruction::BinaryOps Opc;
-    bool InvertPredCond = false;
-
-    if (PBI->getSuccessor(0) == TrueDest)
-      Opc = Instruction::Or;
-    else if (PBI->getSuccessor(1) == FalseDest)
-      Opc = Instruction::And;
-    else if (PBI->getSuccessor(0) == FalseDest)
-      Opc = Instruction::And, InvertPredCond = true;
-    else if (PBI->getSuccessor(1) == TrueDest)
-      Opc = Instruction::Or, InvertPredCond = true;
-    else
-      continue;
 
     DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
     
@@ -1566,6 +1611,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
       AddPredecessorToBlock(FalseDest, PredBlock, BB);
       PBI->setSuccessor(1, FalseDest);
     }
+
+    // Copy any debug value intrinsics into the end of PredBlock.
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      if (isa<DbgInfoIntrinsic>(*I))
+        I->clone()->insertBefore(PBI);
+      
     return true;
   }
   return false;
@@ -1598,13 +1649,15 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     // in the constant and simplify the block result.  Subsequent passes of
     // simplifycfg will thread the block.
     if (BlockIsSimpleEnoughToThreadThrough(BB)) {
+      pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
       PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
+                                       std::distance(PB, PE),
                                        BI->getCondition()->getName() + ".pr",
                                        BB->begin());
       // Okay, we're going to insert the PHI node.  Since PBI is not the only
       // predecessor, compute the PHI'd conditional value for all of the preds.
       // Any predecessor where the condition is not computable we keep symbolic.
-      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      for (pred_iterator PI = PB; PI != PE; ++PI) {
         BasicBlock *P = *PI;
         if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) &&
             PBI != BI && PBI->isConditional() &&
@@ -1800,6 +1853,26 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
   return true;
 }
 
+// SimplifySwitchOnSelect - Replaces
+//   (switch (select cond, X, Y)) on constant X, Y
+// with a branch - conditional if X and Y lead to distinct BBs,
+// unconditional otherwise.
+static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
+  // Check for constant integer values in the select.
+  ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
+  ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
+  if (!TrueVal || !FalseVal)
+    return false;
+
+  // Find the relevant condition and destinations.
+  Value *Condition = Select->getCondition();
+  BasicBlock *TrueBB = SI->getSuccessor(SI->findCaseValue(TrueVal));
+  BasicBlock *FalseBB = SI->getSuccessor(SI->findCaseValue(FalseVal));
+
+  // Perform the actual simplification.
+  return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB);
+}
+
 // SimplifyIndirectBrOnSelect - Replaces
 //   (indirectbr (select cond, blockaddress(@fn, BlockA),
 //                             blockaddress(@fn, BlockB)))
@@ -2148,7 +2221,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
       if (LI->isVolatile())
         break;
     
-    // Delete this instruction
+    // Delete this instruction (any uses are guaranteed to be dead)
+    if (!BBI->use_empty())
+      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
     BBI->eraseFromParent();
     Changed = true;
   }
@@ -2189,17 +2264,28 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
       // If the default value is unreachable, figure out the most popular
       // destination and make it the default.
       if (SI->getSuccessor(0) == BB) {
-        std::map<BasicBlock*, unsigned> Popularity;
-        for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
-          Popularity[SI->getSuccessor(i)]++;
-        
+        std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity;
+        for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
+          std::pair<unsigned, unsigned>& entry =
+              Popularity[SI->getSuccessor(i)];
+          if (entry.first == 0) {
+            entry.first = 1;
+            entry.second = i;
+          } else {
+            entry.first++;
+          }
+        }
+
         // Find the most popular block.
         unsigned MaxPop = 0;
+        unsigned MaxIndex = 0;
         BasicBlock *MaxBlock = 0;
-        for (std::map<BasicBlock*, unsigned>::iterator
+        for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
              I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
-          if (I->second > MaxPop) {
-            MaxPop = I->second;
+          if (I->second.first > MaxPop || 
+              (I->second.first == MaxPop && MaxIndex > I->second.second)) {
+            MaxPop = I->second.first;
+            MaxIndex = I->second.second;
             MaxBlock = I->first;
           }
         }
@@ -2309,7 +2395,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI) {
   if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
     if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred))
       return SimplifyCFG(BB) | true;
-  
+
+  Value *Cond = SI->getCondition();
+  if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
+    if (SimplifySwitchOnSelect(SI, Select))
+      return SimplifyCFG(BB) | true;
+
   // If the block only contains the switch, see if we can fold the block
   // away into any preds.
   BasicBlock::iterator BBI = BB->begin();
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index ccb8287..46d4ada 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -116,7 +116,8 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
     ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
   } else {
     // If the function doesn't return void... add a PHI node to the block...
-    PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal");
+    PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
+                         "UnifiedRetVal");
     NewRetBlock->getInstList().push_back(PN);
     ReturnInst::Create(F.getContext(), PN, NewRetBlock);
   }
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index f5481d3..a73bf04 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -39,7 +39,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
       return VM[V] = const_cast<Value*>(V);
     
     // Create a dummy node in case we have a metadata cycle.
-    MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0);
+    MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>());
     VM[V] = Dummy;
     
     // Check all operands to see if any need to be remapped.
@@ -54,7 +54,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
         Value *Op = MD->getOperand(i);
         Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0);
       }
-      MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size());
+      MDNode *NewMD = MDNode::get(V->getContext(), Elts);
       Dummy->replaceAllUsesWith(NewMD);
       VM[V] = NewMD;
       MDNode::deleteTemporary(Dummy);
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index cbc874a..844284d 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -32,6 +32,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -39,9 +40,13 @@
 #include "llvm/Support/FormattedStream.h"
 #include <algorithm>
 #include <cctype>
-#include <map>
 using namespace llvm;
 
+static cl::opt<bool>
+EnableDebugInfoComment("enable-debug-info-comment", cl::Hidden,
+                       cl::desc("Enable debug info comments"));
+
+
 // Make virtual table appear in this compilation unit.
 AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
 
@@ -89,7 +94,7 @@ enum PrefixType {
 /// prefixed with % (if the string only contains simple characters) or is
 /// surrounded with ""'s (if it has special chars in it).  Print it out.
 static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
-  assert(Name.data() && "Cannot get empty name!");
+  assert(!Name.empty() && "Cannot get empty name!");
   switch (Prefix) {
   default: llvm_unreachable("Bad prefix!");
   case NoPrefix: break;
@@ -1075,7 +1080,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
     }
 
     if (CE->hasIndices()) {
-      const SmallVector<unsigned, 4> &Indices = CE->getIndices();
+      ArrayRef<unsigned> Indices = CE->getIndices();
       for (unsigned i = 0, e = Indices.size(); i != e; ++i)
         Out << ", " << Indices[i];
     }
@@ -1338,9 +1343,12 @@ void AssemblyWriter::printModule(const Module *M) {
       CurPos = NewLine+1;
       NewLine = Asm.find_first_of('\n', CurPos);
     }
-    Out << "module asm \"";
-    PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out);
-    Out << "\"\n";
+    std::string rest(Asm.begin()+CurPos, Asm.end());
+    if (!rest.empty()) {
+      Out << "module asm \"";
+      PrintEscapedString(rest, Out);
+      Out << "\"\n";
+    }
   }
 
   // Loop over the dependent libraries and emit them.
@@ -1581,8 +1589,8 @@ void AssemblyWriter::printFunction(const Function *F) {
   case CallingConv::ARM_AAPCS:    Out << "arm_aapcscc "; break;
   case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break;
   case CallingConv::MSP430_INTR:  Out << "msp430_intrcc "; break;
-  case CallingConv::PTX_Kernel:   Out << "ptx_kernel"; break;
-  case CallingConv::PTX_Device:   Out << "ptx_device"; break;
+  case CallingConv::PTX_Kernel:   Out << "ptx_kernel "; break;
+  case CallingConv::PTX_Device:   Out << "ptx_device "; break;
   default: Out << "cc" << F->getCallingConv() << " "; break;
   }
 
@@ -1727,6 +1735,18 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
   if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out);
 }
 
+/// printDebugLoc - Print DebugLoc.
+static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) {
+  OS << DL.getLine() << ":" << DL.getCol();
+  if (MDNode *N = DL.getInlinedAt(getGlobalContext())) {
+    DebugLoc IDL = DebugLoc::getFromDILocation(N);
+    if (!IDL.isUnknown()) {
+      OS << "@";
+      printDebugLoc(IDL,OS);
+    }
+  }
+}
+
 /// printInfoComment - Print a little comment after the instruction indicating
 /// which slot it occupies.
 ///
@@ -1734,6 +1754,43 @@ void AssemblyWriter::printInfoComment(const Value &V) {
   if (AnnotationWriter) {
     AnnotationWriter->printInfoComment(V, Out);
     return;
+  } else if (EnableDebugInfoComment) {
+    bool Padded = false;
+    if (const Instruction *I = dyn_cast<Instruction>(&V)) {
+      const DebugLoc &DL = I->getDebugLoc();
+      if (!DL.isUnknown()) {
+        if (!Padded) {
+          Out.PadToColumn(50);
+          Padded = true;
+          Out << ";";
+        }
+        Out << " [debug line = ";
+        printDebugLoc(DL,Out);
+        Out << "]";
+      }
+      if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
+        const MDNode *Var = DDI->getVariable();
+        if (!Padded) {
+          Out.PadToColumn(50);
+          Padded = true;
+          Out << ";";
+        }
+        if (Var && Var->getNumOperands() >= 2)
+          if (MDString *MDS = dyn_cast_or_null<MDString>(Var->getOperand(2)))
+            Out << " [debug variable = " << MDS->getString() << "]";
+      }
+      else if (const DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
+        const MDNode *Var = DVI->getVariable();
+        if (!Padded) {
+          Out.PadToColumn(50);
+          Padded = true;
+          Out << ";";
+        }
+        if (Var && Var->getNumOperands() >= 2)
+          if (MDString *MDS = dyn_cast_or_null<MDString>(Var->getOperand(2)))
+            Out << " [debug variable = " << MDS->getString() << "]";
+      }
+    }
   }
 }
 
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index b323540..4541f38 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -84,7 +84,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
             Name.compare(14, 5, "vsubl", 5) == 0 ||
             Name.compare(14, 5, "vaddw", 5) == 0 ||
             Name.compare(14, 5, "vsubw", 5) == 0 ||
-            Name.compare(14, 5, "vmull", 5) == 0 ||
             Name.compare(14, 5, "vmlal", 5) == 0 ||
             Name.compare(14, 5, "vmlsl", 5) == 0 ||
             Name.compare(14, 5, "vabdl", 5) == 0 ||
@@ -528,6 +527,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       // or 0.
       NewFn = 0;
       return true;           
+    } else if (Name.compare(5, 16, "x86.sse.loadu.ps", 16) == 0 ||
+               Name.compare(5, 17, "x86.sse2.loadu.dq", 17) == 0 ||
+               Name.compare(5, 17, "x86.sse2.loadu.pd", 17) == 0) {
+      // Calls to these instructions are transformed into unaligned loads.
+      NewFn = 0;
+      return true;
     } else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) {
       // This is an SSE/MMX instruction.
       const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
@@ -947,7 +952,29 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
         
       // Remove upgraded instruction.
       CI->eraseFromParent();
-      
+    
+    } else if (F->getName() == "llvm.x86.sse.loadu.ps" ||
+               F->getName() == "llvm.x86.sse2.loadu.dq" ||
+               F->getName() == "llvm.x86.sse2.loadu.pd") {
+      // Convert to a native, unaligned load.
+      const Type *VecTy = CI->getType();
+      const Type *IntTy = IntegerType::get(C, 128);
+      IRBuilder<> Builder(C);
+      Builder.SetInsertPoint(CI->getParent(), CI);
+
+      Value *BC = Builder.CreateBitCast(CI->getArgOperand(0),
+                                        PointerType::getUnqual(IntTy),
+                                        "cast");
+      LoadInst *LI = Builder.CreateLoad(BC, CI->getName());
+      LI->setAlignment(1);      // Unaligned load.
+      BC = Builder.CreateBitCast(LI, VecTy, "new.cast");
+
+      // Fix up all the uses with our new load.
+      if (!CI->use_empty())
+        CI->replaceAllUsesWith(BC);
+
+      // Remove intrinsic.
+      CI->eraseFromParent();
     } else {
       llvm_unreachable("Unknown function for CallInst upgrade.");
     }
@@ -1180,74 +1207,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     break;
   }
 
-#if 0
-  case Intrinsic::x86_mmx_cvtsi32_si64: {
-    // The return type needs to be changed.
-    Value *Operands[1];
-    Operands[0] = CI->getArgOperand(0);
-    ConstructNewCallInst(NewFn, CI, Operands, 1);
-    break;
-  }
-  case Intrinsic::x86_mmx_cvtsi64_si32: {
-    Value *Operands[1];
-
-    // Cast the operand to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0),
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 1);
-    break;
-  }
-  case Intrinsic::x86_mmx_vec_init_b:
-  case Intrinsic::x86_mmx_vec_init_w:
-  case Intrinsic::x86_mmx_vec_init_d: {
-    // The return type needs to be changed.
-    Value *Operands[8];
-    unsigned NumOps = 0;
-
-    switch (NewFn->getIntrinsicID()) {
-    default: break;
-    case Intrinsic::x86_mmx_vec_init_b: NumOps = 8; break;
-    case Intrinsic::x86_mmx_vec_init_w: NumOps = 4; break;
-    case Intrinsic::x86_mmx_vec_init_d: NumOps = 2; break;
-    }
-
-    switch (NewFn->getIntrinsicID()) {
-    default: break;
-    case Intrinsic::x86_mmx_vec_init_b:
-      Operands[7] = CI->getArgOperand(7);
-      Operands[6] = CI->getArgOperand(6);
-      Operands[5] = CI->getArgOperand(5);
-      Operands[4] = CI->getArgOperand(4);
-      // FALLTHRU
-    case Intrinsic::x86_mmx_vec_init_w:
-      Operands[3] = CI->getArgOperand(3);
-      Operands[2] = CI->getArgOperand(2);
-      // FALLTHRU
-    case Intrinsic::x86_mmx_vec_init_d:
-      Operands[1] = CI->getArgOperand(1);
-      Operands[0] = CI->getArgOperand(0);
-      break;
-    }
-
-    ConstructNewCallInst(NewFn, CI, Operands, NumOps);
-    break;
-  }
-  case Intrinsic::x86_mmx_vec_ext_d: {
-    Value *Operands[2];
-
-    // Cast the operand to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0),
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = CI->getArgOperand(1);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2);
-    break;
-  }
-#endif
-
   case Intrinsic::ctlz:
   case Intrinsic::ctpop:
   case Intrinsic::cttz: {
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index 1abd031..6bde263 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMCore
   Constants.cpp
   Core.cpp
   DebugLoc.cpp
+  DebugInfoProbe.cpp
   Dominators.cpp
   Function.cpp
   GVMaterializer.cpp
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 573efb7..9985ada 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Function.h"
 #include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/Operator.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -1735,7 +1736,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
             // with a single zero index, it must be nonzero.
             assert(CE1->getNumOperands() == 2 &&
                    !CE1->getOperand(1)->isNullValue() &&
-                   "Suprising getelementptr!");
+                   "Surprising getelementptr!");
             return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
           } else {
             // If they are different globals, we don't know what the value is,
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 246fde1..15d7793 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -32,7 +32,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include <algorithm>
-#include <map>
+#include <cstdarg>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -325,27 +325,53 @@ ConstantInt::ConstantInt(const IntegerType *Ty, const APInt& V)
   assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type");
 }
 
-ConstantInt* ConstantInt::getTrue(LLVMContext &Context) {
+ConstantInt *ConstantInt::getTrue(LLVMContext &Context) {
   LLVMContextImpl *pImpl = Context.pImpl;
   if (!pImpl->TheTrueVal)
     pImpl->TheTrueVal = ConstantInt::get(Type::getInt1Ty(Context), 1);
   return pImpl->TheTrueVal;
 }
 
-ConstantInt* ConstantInt::getFalse(LLVMContext &Context) {
+ConstantInt *ConstantInt::getFalse(LLVMContext &Context) {
   LLVMContextImpl *pImpl = Context.pImpl;
   if (!pImpl->TheFalseVal)
     pImpl->TheFalseVal = ConstantInt::get(Type::getInt1Ty(Context), 0);
   return pImpl->TheFalseVal;
 }
 
+Constant *ConstantInt::getTrue(const Type *Ty) {
+  const VectorType *VTy = dyn_cast<VectorType>(Ty);
+  if (!VTy) {
+    assert(Ty->isIntegerTy(1) && "True must be i1 or vector of i1.");
+    return ConstantInt::getTrue(Ty->getContext());
+  }
+  assert(VTy->getElementType()->isIntegerTy(1) &&
+         "True must be vector of i1 or i1.");
+  SmallVector<Constant*, 16> Splat(VTy->getNumElements(),
+                                   ConstantInt::getTrue(Ty->getContext()));
+  return ConstantVector::get(Splat);
+}
+
+Constant *ConstantInt::getFalse(const Type *Ty) {
+  const VectorType *VTy = dyn_cast<VectorType>(Ty);
+  if (!VTy) {
+    assert(Ty->isIntegerTy(1) && "False must be i1 or vector of i1.");
+    return ConstantInt::getFalse(Ty->getContext());
+  }
+  assert(VTy->getElementType()->isIntegerTy(1) &&
+         "False must be vector of i1 or i1.");
+  SmallVector<Constant*, 16> Splat(VTy->getNumElements(),
+                                   ConstantInt::getFalse(Ty->getContext()));
+  return ConstantVector::get(Splat);
+}
+
 
 // Get a ConstantInt from an APInt. Note that the value stored in the DenseMap 
 // as the key, is a DenseMapAPIntKeyInfo::KeyTy which has provided the
 // operator== and operator!= to ensure that the DenseMap doesn't attempt to
 // compare APInt's of different widths, which would violate an APInt class
 // invariant which generates an assertion.
-ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt& V) {
+ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) {
   // Get the corresponding integer type for the bit width of the value.
   const IntegerType *ITy = IntegerType::get(Context, V.getBitWidth());
   // get an existing value or the insertion position
@@ -355,9 +381,8 @@ ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt& V) {
   return Slot;
 }
 
-Constant *ConstantInt::get(const Type* Ty, uint64_t V, bool isSigned) {
-  Constant *C = get(cast<IntegerType>(Ty->getScalarType()),
-                               V, isSigned);
+Constant *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) {
+  Constant *C = get(cast<IntegerType>(Ty->getScalarType()), V, isSigned);
 
   // For vectors, broadcast the value.
   if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
@@ -596,8 +621,6 @@ Constant *ConstantArray::get(LLVMContext &Context, StringRef Str,
   return get(ATy, ElementVals);
 }
 
-
-
 ConstantStruct::ConstantStruct(const StructType *T,
                                const std::vector<Constant*> &V)
   : Constant(T, ConstantStructVal,
@@ -644,6 +667,19 @@ Constant *ConstantStruct::get(LLVMContext &Context,
   return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed);
 }
 
+Constant* ConstantStruct::get(LLVMContext &Context, bool Packed,
+                              Constant * Val, ...) {
+  va_list ap;
+  std::vector<Constant*> Values;
+  va_start(ap, Val);
+  while (Val) {
+    Values.push_back(Val);
+    Val = va_arg(ap, llvm::Constant*);
+  }
+  va_end(ap);
+  return get(Context, Values, Packed);
+}
+
 ConstantVector::ConstantVector(const VectorType *T,
                                const std::vector<Constant*> &V)
   : Constant(T, ConstantVectorVal,
@@ -734,7 +770,7 @@ bool ConstantExpr::hasIndices() const {
          getOpcode() == Instruction::InsertValue;
 }
 
-const SmallVector<unsigned, 4> &ConstantExpr::getIndices() const {
+ArrayRef<unsigned> ConstantExpr::getIndices() const {
   if (const ExtractValueConstantExpr *EVCE =
         dyn_cast<ExtractValueConstantExpr>(this))
     return EVCE->Indices;
@@ -818,10 +854,10 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
 /// operands replaced with the specified values.  The specified operands must
 /// match count and type with the existing ones.
 Constant *ConstantExpr::
-getWithOperands(Constant *const *Ops, unsigned NumOps) const {
-  assert(NumOps == getNumOperands() && "Operand count mismatch!");
+getWithOperands(ArrayRef<Constant*> Ops) const {
+  assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
   bool AnyChange = false;
-  for (unsigned i = 0; i != NumOps; ++i) {
+  for (unsigned i = 0; i != Ops.size(); ++i) {
     assert(Ops[i]->getType() == getOperand(i)->getType() &&
            "Operand type mismatch!");
     AnyChange |= Ops[i] != getOperand(i);
@@ -853,8 +889,8 @@ getWithOperands(Constant *const *Ops, unsigned NumOps) const {
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
   case Instruction::GetElementPtr:
     return cast<GEPOperator>(this)->isInBounds() ?
-      ConstantExpr::getInBoundsGetElementPtr(Ops[0], &Ops[1], NumOps-1) :
-      ConstantExpr::getGetElementPtr(Ops[0], &Ops[1], NumOps-1);
+      ConstantExpr::getInBoundsGetElementPtr(Ops[0], &Ops[1], Ops.size()-1) :
+      ConstantExpr::getGetElementPtr(Ops[0], &Ops[1], Ops.size()-1);
   case Instruction::ICmp:
   case Instruction::FCmp:
     return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]);
@@ -2114,7 +2150,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
     Constant *Agg = getOperand(0);
     if (Agg == From) Agg = To;
     
-    const SmallVector<unsigned, 4> &Indices = getIndices();
+    ArrayRef<unsigned> Indices = getIndices();
     Replacement = ConstantExpr::getExtractValue(Agg,
                                                 &Indices[0], Indices.size());
   } else if (getOpcode() == Instruction::InsertValue) {
@@ -2123,7 +2159,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
     if (Agg == From) Agg = To;
     if (Val == From) Val = To;
     
-    const SmallVector<unsigned, 4> &Indices = getIndices();
+    ArrayRef<unsigned> Indices = getIndices();
     Replacement = ConstantExpr::getInsertValue(Agg, Val,
                                                &Indices[0], Indices.size());
   } else if (isCast()) {
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index ffc673f..1395754 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -301,20 +301,18 @@ struct OperandTraits<CompareConstantExpr> :
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value)
 
 struct ExprMapKeyType {
-  typedef SmallVector<unsigned, 4> IndexList;
-
   ExprMapKeyType(unsigned opc,
-      const std::vector<Constant*> &ops,
+      ArrayRef<Constant*> ops,
       unsigned short flags = 0,
       unsigned short optionalflags = 0,
-      const IndexList &inds = IndexList())
+      ArrayRef<unsigned> inds = ArrayRef<unsigned>())
         : opcode(opc), subclassoptionaldata(optionalflags), subclassdata(flags),
-        operands(ops), indices(inds) {}
+        operands(ops.begin(), ops.end()), indices(inds.begin(), inds.end()) {}
   uint8_t opcode;
   uint8_t subclassoptionaldata;
   uint16_t subclassdata;
   std::vector<Constant*> operands;
-  IndexList indices;
+  SmallVector<unsigned, 4> indices;
   bool operator==(const ExprMapKeyType& that) const {
     return this->opcode == that.opcode &&
            this->subclassdata == that.subclassdata &&
@@ -465,7 +463,7 @@ struct ConstantKeyData<ConstantExpr> {
         CE->isCompare() ? CE->getPredicate() : 0,
         CE->getRawSubclassOptionalData(),
         CE->hasIndices() ?
-          CE->getIndices() : SmallVector<unsigned, 4>());
+          CE->getIndices() : ArrayRef<unsigned>());
   }
 };
 
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 35c3a2e..92f9440 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -335,7 +335,7 @@ unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy) {
 
 void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) {
   StructType *Ty = unwrap<StructType>(StructTy);
-  for (FunctionType::param_iterator I = Ty->element_begin(),
+  for (StructType::element_iterator I = Ty->element_begin(),
                                     E = Ty->element_end(); I != E; ++I)
     *Dest++ = wrap(*I);
 }
@@ -543,7 +543,8 @@ LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) {
 
 LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
                                  unsigned Count) {
-  return wrap(MDNode::get(*unwrap(C), unwrap<Value>(Vals, Count), Count));
+  return wrap(MDNode::get(*unwrap(C),
+                          ArrayRef<Value*>(unwrap<Value>(Vals, Count), Count)));
 }
 
 LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) {
@@ -2082,7 +2083,7 @@ LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef B, LLVMRealPredicate Op,
 /*--.. Miscellaneous instructions ..........................................--*/
 
 LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) {
-  return wrap(unwrap(B)->CreatePHI(unwrap(Ty), Name));
+  return wrap(unwrap(B)->CreatePHI(unwrap(Ty), 0, Name));
 }
 
 LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn,
diff --git a/lib/VMCore/DebugInfoProbe.cpp b/lib/VMCore/DebugInfoProbe.cpp
new file mode 100644
index 0000000..334c3d8
--- /dev/null
+++ b/lib/VMCore/DebugInfoProbe.cpp
@@ -0,0 +1,258 @@
+//===-- DebugInfoProbe.cpp - DebugInfo Probe ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements DebugInfoProbe. This probe can be used by a pass
+// manager to analyze how optimizer is treating debugging information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "debuginfoprobe"
+#include "llvm/DebugInfoProbe.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringRef.h"
+#include <set>
+#include <string>
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableDebugInfoProbe("enable-debug-info-probe", cl::Hidden,
+                     cl::desc("Enable debug info probe"));
+
+// CreateInfoOutputFile - Return a file stream to print our output on.
+namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbeImpl - This class implements a interface to monitor
+// how an optimization pass is preserving debugging information.
+
+namespace llvm {
+
+  class DebugInfoProbeImpl {
+  public:
+    DebugInfoProbeImpl() : NumDbgLineLost(0),NumDbgValueLost(0) {}
+    void initialize(StringRef PName, Function &F);
+    void finalize(Function &F);
+    void report();
+  private:
+    unsigned NumDbgLineLost, NumDbgValueLost;
+    std::string PassName;
+    Function *TheFn;
+    std::set<unsigned> LineNos;
+    std::set<MDNode *> DbgVariables;
+    std::set<Instruction *> MissingDebugLoc;
+  };
+}
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbeImpl
+
+static void collect(Function &F, std::set<unsigned> &Lines) {
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); 
+         BI != BE; ++BI) {
+      const DebugLoc &DL = BI->getDebugLoc();
+      unsigned LineNo = 0;
+      if (!DL.isUnknown()) {
+        if (MDNode *N = DL.getInlinedAt(F.getContext()))
+          LineNo = DebugLoc::getFromDILocation(N).getLine();
+        else
+          LineNo = DL.getLine();
+
+        Lines.insert(LineNo);
+      }
+    }
+}
+
+/// initialize - Collect information before running an optimization pass.
+void DebugInfoProbeImpl::initialize(StringRef PName, Function &F) {
+  if (!EnableDebugInfoProbe) return;
+  PassName = PName;
+
+  LineNos.clear();
+  DbgVariables.clear();
+  TheFn = &F;
+  collect(F, LineNos);
+
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); 
+         BI != BE; ++BI) {
+      if (BI->getDebugLoc().isUnknown())
+        MissingDebugLoc.insert(BI);
+      if (!isa<DbgInfoIntrinsic>(BI)) continue;
+      Value *Addr = NULL;
+      MDNode *Node = NULL;
+      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
+        Addr = DDI->getAddress();
+        Node = DDI->getVariable();
+      } else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
+        Addr = DVI->getValue();
+        Node = DVI->getVariable();
+      }
+      if (Addr)
+        DbgVariables.insert(Node);
+    }
+}
+
+/// report - Report findings. This should be invoked after finalize.
+void DebugInfoProbeImpl::report() {
+  if (!EnableDebugInfoProbe) return;
+  if (NumDbgLineLost || NumDbgValueLost) {
+    raw_ostream *OutStream = CreateInfoOutputFile();
+    if (NumDbgLineLost)
+      *OutStream << NumDbgLineLost
+                 << "\t times line number info lost by "
+                 << PassName << "\n";
+    if (NumDbgValueLost)
+      *OutStream << NumDbgValueLost
+                 << "\t times variable info lost by    "
+                 << PassName << "\n";
+    delete OutStream;
+  }
+  NumDbgLineLost = 0;
+  NumDbgValueLost = 0;
+}
+
+/// finalize - Collect information after running an optimization pass. This
+/// must be used after initialization.
+void DebugInfoProbeImpl::finalize(Function &F) {
+  if (!EnableDebugInfoProbe) return;
+  std::set<unsigned> LineNos2;
+  collect(F, LineNos2);
+  assert (TheFn == &F && "Invalid function to measure!");
+
+  for (std::set<unsigned>::iterator I = LineNos.begin(),
+         E = LineNos.end(); I != E; ++I) {
+    unsigned LineNo = *I;
+    if (LineNos2.count(LineNo) == 0) {
+      DEBUG(dbgs() 
+            << "DebugInfoProbe("
+            << PassName
+            << "): Losing dbg info for source line " 
+            << LineNo << "\n");
+      ++NumDbgLineLost;
+    }
+  }
+
+  std::set<MDNode *>DbgVariables2;
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); 
+         BI != BE; ++BI) {
+      if (BI->getDebugLoc().isUnknown() &&
+          MissingDebugLoc.count(BI) == 0) {
+        DEBUG(dbgs() << "DebugInfoProbe(" << PassName << "): --- ");
+        DEBUG(BI->print(dbgs()));
+        DEBUG(dbgs() << "\n");
+      }
+      if (!isa<DbgInfoIntrinsic>(BI)) continue;
+      Value *Addr = NULL;
+      MDNode *Node = NULL;
+      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
+        Addr = DDI->getAddress();
+        Node = DDI->getVariable();
+      } else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
+        Addr = DVI->getValue();
+        Node = DVI->getVariable();
+      }
+      if (Addr)
+        DbgVariables2.insert(Node);
+    }
+
+  for (std::set<MDNode *>::iterator I = DbgVariables.begin(), 
+         E = DbgVariables.end(); I != E; ++I) {
+    if (DbgVariables2.count(*I) == 0 && (*I)->getNumOperands() >= 2) {
+      DEBUG(dbgs() 
+            << "DebugInfoProbe("
+            << PassName
+            << "): Losing dbg info for variable: ";
+            if (MDString *MDS = dyn_cast_or_null<MDString>(
+                (*I)->getOperand(2)))
+              dbgs() << MDS->getString();
+            else
+              dbgs() << "...";
+            dbgs() << "\n");
+      ++NumDbgValueLost;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbe
+
+DebugInfoProbe::DebugInfoProbe() {
+  pImpl = new DebugInfoProbeImpl();
+}
+
+DebugInfoProbe::~DebugInfoProbe() {
+  delete pImpl;
+}
+
+/// initialize - Collect information before running an optimization pass.
+void DebugInfoProbe::initialize(StringRef PName, Function &F) {
+  pImpl->initialize(PName, F);
+}
+
+/// finalize - Collect information after running an optimization pass. This
+/// must be used after initialization.
+void DebugInfoProbe::finalize(Function &F) {
+  pImpl->finalize(F);
+}
+
+/// report - Report findings. This should be invoked after finalize.
+void DebugInfoProbe::report() {
+  pImpl->report();
+}
+
+//===----------------------------------------------------------------------===//
+// DebugInfoProbeInfo
+
+/// ~DebugInfoProbeInfo - Report data collected by all probes before deleting
+/// them.
+DebugInfoProbeInfo::~DebugInfoProbeInfo() {
+  if (!EnableDebugInfoProbe) return;
+    for (StringMap<DebugInfoProbe*>::iterator I = Probes.begin(),
+           E = Probes.end(); I != E; ++I) {
+      I->second->report();
+      delete I->second;
+    }
+  }
+
+/// initialize - Collect information before running an optimization pass.
+void DebugInfoProbeInfo::initialize(Pass *P, Function &F) {
+  if (!EnableDebugInfoProbe) return;
+  if (P->getAsPMDataManager())
+    return;
+
+  StringMapEntry<DebugInfoProbe *> &Entry =
+    Probes.GetOrCreateValue(P->getPassName());
+  DebugInfoProbe *&Probe = Entry.getValue();
+  if (!Probe)
+    Probe = new DebugInfoProbe();
+  Probe->initialize(P->getPassName(), F);
+}
+
+/// finalize - Collect information after running an optimization pass. This
+/// must be used after initialization.
+void DebugInfoProbeInfo::finalize(Pass *P, Function &F) {
+  if (!EnableDebugInfoProbe) return;
+  if (P->getAsPMDataManager())
+    return;
+  StringMapEntry<DebugInfoProbe *> &Entry =
+    Probes.GetOrCreateValue(P->getPassName());
+  DebugInfoProbe *&Probe = Entry.getValue();
+  assert (Probe && "DebugInfoProbe is not initialized!");
+  Probe->finalize(F);
+}
diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp
index f8b45ee..520333c 100644
--- a/lib/VMCore/DebugLoc.cpp
+++ b/lib/VMCore/DebugLoc.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/DebugLoc.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "LLVMContextImpl.h"
 using namespace llvm;
 
@@ -108,7 +109,7 @@ MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const {
     ConstantInt::get(Int32, getLine()), ConstantInt::get(Int32, getCol()),
     Scope, IA
   };
-  return MDNode::get(Ctx2, &Elts[0], 4);
+  return MDNode::get(Ctx2, Elts);
 }
 
 /// getFromDILocation - Translate the DILocation quad into a DebugLoc.
@@ -128,6 +129,29 @@ DebugLoc DebugLoc::getFromDILocation(MDNode *N) {
 }
 
 //===----------------------------------------------------------------------===//
+// DenseMap specialization
+//===----------------------------------------------------------------------===//
+
+DebugLoc DenseMapInfo<DebugLoc>::getEmptyKey() {
+  return DebugLoc::getEmptyKey();
+}
+
+DebugLoc DenseMapInfo<DebugLoc>::getTombstoneKey() {
+  return DebugLoc::getTombstoneKey();
+}
+
+unsigned DenseMapInfo<DebugLoc>::getHashValue(const DebugLoc &Key) {
+  FoldingSetNodeID ID;
+  ID.AddInteger(Key.LineCol);
+  ID.AddInteger(Key.ScopeIdx);
+  return ID.ComputeHash();
+}
+
+bool DenseMapInfo<DebugLoc>::isEqual(const DebugLoc &LHS, const DebugLoc &RHS) {
+  return LHS == RHS;
+}
+
+//===----------------------------------------------------------------------===//
 // LLVMContextImpl Implementation
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
index c374b06..08b845e 100644
--- a/lib/VMCore/Dominators.cpp
+++ b/lib/VMCore/Dominators.cpp
@@ -68,9 +68,8 @@ void DominatorTree::verifyAnalysis() const {
   DominatorTree OtherDT;
   OtherDT.getBase().recalculate(F);
   if (compare(OtherDT)) {
-    errs() << "DominatorTree is not up to date!  Computed:\n";
+    errs() << "DominatorTree is not up to date!\nComputed:\n";
     print(errs());
-    
     errs() << "\nActual:\n";
     OtherDT.print(errs());
     abort();
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 00d1d78..013c458 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -328,7 +328,7 @@ unsigned Function::getIntrinsicID() const {
 
 std::string Intrinsic::getName(ID id, const Type **Tys, unsigned numTys) { 
   assert(id < num_intrinsics && "Invalid intrinsic ID!");
-  const char * const Table[] = {
+  static const char * const Table[] = {
     "not_intrinsic",
 #define GET_INTRINSIC_NAME_TABLE
 #include "llvm/Intrinsics.gen"
@@ -363,7 +363,7 @@ const FunctionType *Intrinsic::getType(LLVMContext &Context,
 }
 
 bool Intrinsic::isOverloaded(ID id) {
-  const bool OTable[] = {
+  static const bool OTable[] = {
     false,
 #define GET_INTRINSIC_OVERLOAD_TABLE
 #include "llvm/Intrinsics.gen"
diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp
index 595dea4..2149155 100644
--- a/lib/VMCore/IRBuilder.cpp
+++ b/lib/VMCore/IRBuilder.cpp
@@ -23,13 +23,14 @@ using namespace llvm;
 /// has array of i8 type filled in with the nul terminated string value
 /// specified.  If Name is specified, it is the name of the global variable
 /// created.
-Value *IRBuilderBase::CreateGlobalString(const char *Str, const Twine &Name) {
+Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) {
   Constant *StrConstant = ConstantArray::get(Context, Str, true);
   Module &M = *BB->getParent()->getParent();
   GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(),
                                           true, GlobalValue::InternalLinkage,
                                           StrConstant, "", 0, false);
   GV->setName(Name);
+  GV->setUnnamedAddr(true);
   return GV;
 }
 
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index d129028..61da9b6 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -131,26 +131,15 @@ Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
   return Removed;
 }
 
-/// resizeOperands - resize operands - This adjusts the length of the operands
-/// list according to the following behavior:
-///   1. If NumOps == 0, grow the operand list in response to a push_back style
-///      of operation.  This grows the number of ops by 1.5 times.
-///   2. If NumOps > NumOperands, reserve space for NumOps operands.
-///   3. If NumOps == NumOperands, trim the reserved space.
+/// growOperands - grow operands - This grows the operand list in response
+/// to a push_back style of operation.  This grows the number of ops by 1.5
+/// times.
 ///
-void PHINode::resizeOperands(unsigned NumOps) {
+void PHINode::growOperands() {
   unsigned e = getNumOperands();
-  if (NumOps == 0) {
-    NumOps = e*3/2;
-    if (NumOps < 4) NumOps = 4;      // 4 op PHI nodes are VERY common.
-  } else if (NumOps*2 > NumOperands) {
-    // No resize needed.
-    if (ReservedSpace >= NumOps) return;
-  } else if (NumOps == NumOperands) {
-    if (ReservedSpace == NumOps) return;
-  } else {
-    return;
-  }
+  // Multiply by 1.5 and round down so the result is still even.
+  unsigned NumOps = e + e / 4 * 2;
+  if (NumOps < 4) NumOps = 4;      // 4 op PHI nodes are VERY common.
 
   ReservedSpace = NumOps;
   Use *OldOps = OperandList;
@@ -2297,8 +2286,12 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
     if (const VectorType *SrcPTy = dyn_cast<VectorType>(SrcTy)) {
                                                 // Casting from vector
       return DestPTy->getBitWidth() == SrcPTy->getBitWidth();
-    } else {                                    // Casting from something else
-      return DestPTy->getBitWidth() == SrcBits;
+    } else if (DestPTy->getBitWidth() == SrcBits) {
+      return true;                              // float/int -> vector
+    } else if (SrcTy->isX86_MMXTy()) {
+      return DestPTy->getBitWidth() == 64;      // MMX to 64-bit vector
+    } else {
+      return false;
     }
   } else if (DestTy->isPointerTy()) {        // Casting to pointer
     if (SrcTy->isPointerTy()) {              // Casting from pointer
@@ -2308,8 +2301,12 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
     } else {                                    // Casting from something else
       return false;
     }
-  } else if (DestTy->isX86_MMXTy()) {     
-    return SrcBits == 64;
+  } else if (DestTy->isX86_MMXTy()) {
+    if (const VectorType *SrcPTy = dyn_cast<VectorType>(SrcTy)) {
+      return SrcPTy->getBitWidth() == 64;       // 64-bit vector to MMX
+    } else {
+      return false;
+    }
   } else {                                      // Casting to something else
     return false;
   }
@@ -2990,7 +2987,7 @@ SwitchInst::~SwitchInst() {
 void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
   unsigned OpNo = NumOperands;
   if (OpNo+2 > ReservedSpace)
-    resizeOperands(0);  // Get more space!
+    growOperands();  // Get more space!
   // Initialize some new operands.
   assert(OpNo+1 < ReservedSpace && "Growing didn't work!");
   NumOperands = OpNo+2;
@@ -3021,25 +3018,12 @@ void SwitchInst::removeCase(unsigned idx) {
   NumOperands = NumOps-2;
 }
 
-/// resizeOperands - resize operands - This adjusts the length of the operands
-/// list according to the following behavior:
-///   1. If NumOps == 0, grow the operand list in response to a push_back style
-///      of operation.  This grows the number of ops by 3 times.
-///   2. If NumOps > NumOperands, reserve space for NumOps operands.
-///   3. If NumOps == NumOperands, trim the reserved space.
+/// growOperands - grow operands - This grows the operand list in response
+/// to a push_back style of operation.  This grows the number of ops by 3 times.
 ///
-void SwitchInst::resizeOperands(unsigned NumOps) {
+void SwitchInst::growOperands() {
   unsigned e = getNumOperands();
-  if (NumOps == 0) {
-    NumOps = e*3;
-  } else if (NumOps*2 > NumOperands) {
-    // No resize needed.
-    if (ReservedSpace >= NumOps) return;
-  } else if (NumOps == NumOperands) {
-    if (ReservedSpace == NumOps) return;
-  } else {
-    return;
-  }
+  unsigned NumOps = e*3;
 
   ReservedSpace = NumOps;
   Use *NewOps = allocHungoffUses(NumOps);
@@ -3077,25 +3061,12 @@ void IndirectBrInst::init(Value *Address, unsigned NumDests) {
 }
 
 
-/// resizeOperands - resize operands - This adjusts the length of the operands
-/// list according to the following behavior:
-///   1. If NumOps == 0, grow the operand list in response to a push_back style
-///      of operation.  This grows the number of ops by 2 times.
-///   2. If NumOps > NumOperands, reserve space for NumOps operands.
-///   3. If NumOps == NumOperands, trim the reserved space.
+/// growOperands - grow operands - This grows the operand list in response
+/// to a push_back style of operation.  This grows the number of ops by 2 times.
 ///
-void IndirectBrInst::resizeOperands(unsigned NumOps) {
+void IndirectBrInst::growOperands() {
   unsigned e = getNumOperands();
-  if (NumOps == 0) {
-    NumOps = e*2;
-  } else if (NumOps*2 > NumOperands) {
-    // No resize needed.
-    if (ReservedSpace >= NumOps) return;
-  } else if (NumOps == NumOperands) {
-    if (ReservedSpace == NumOps) return;
-  } else {
-    return;
-  }
+  unsigned NumOps = e*2;
   
   ReservedSpace = NumOps;
   Use *NewOps = allocHungoffUses(NumOps);
@@ -3139,7 +3110,7 @@ IndirectBrInst::~IndirectBrInst() {
 void IndirectBrInst::addDestination(BasicBlock *DestBB) {
   unsigned OpNo = NumOperands;
   if (OpNo+1 > ReservedSpace)
-    resizeOperands(0);  // Get more space!
+    growOperands();  // Get more space!
   // Initialize some new operands.
   assert(OpNo < ReservedSpace && "Growing didn't work!");
   NumOperands = OpNo+1;
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 23971aa..6ea4b48 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -184,7 +184,7 @@ public:
 
   // Concrete/Abstract TypeDescriptions - We lazily calculate type descriptions
   // for types as they are needed.  Because resolution of types must invalidate
-  // all of the abstract type descriptions, we keep them in a seperate map to 
+  // all of the abstract type descriptions, we keep them in a separate map to
   // make this easy.
   TypePrinting ConcreteTypeDescriptions;
   TypePrinting AbstractTypeDescriptions;
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 0b8e8df..eb719e5 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -84,18 +84,18 @@ static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) {
   return reinterpret_cast<MDNodeOperand*>(N+1)+Op;
 }
 
-MDNode::MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals,
-               bool isFunctionLocal)
+MDNode::MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal)
 : Value(Type::getMetadataTy(C), Value::MDNodeVal) {
-  NumOperands = NumVals;
+  NumOperands = Vals.size();
 
   if (isFunctionLocal)
     setValueSubclassData(getSubclassDataFromValue() | FunctionLocalBit);
 
   // Initialize the operand list, which is co-allocated on the end of the node.
+  unsigned i = 0;
   for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
-       Op != E; ++Op, ++Vals)
-    new (Op) MDNodeOperand(*Vals, this);
+       Op != E; ++Op, ++i)
+    new (Op) MDNodeOperand(Vals[i], this);
 }
 
 
@@ -183,9 +183,8 @@ static bool isFunctionLocalValue(Value *V) {
          (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal());
 }
 
-MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
-                          unsigned NumVals, FunctionLocalness FL,
-                          bool Insert) {
+MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef<Value*> Vals,
+                          FunctionLocalness FL, bool Insert) {
   LLVMContextImpl *pImpl = Context.pImpl;
 
   // Add all the operand pointers. Note that we don't have to add the
@@ -193,7 +192,7 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
   // Note that if the operands are later nulled out, the node will be
   // removed from the uniquing map.
   FoldingSetNodeID ID;
-  for (unsigned i = 0; i != NumVals; ++i)
+  for (unsigned i = 0; i != Vals.size(); ++i)
     ID.AddPointer(Vals[i]);
 
   void *InsertPoint;
@@ -205,7 +204,7 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
   bool isFunctionLocal = false;
   switch (FL) {
   case FL_Unknown:
-    for (unsigned i = 0; i != NumVals; ++i) {
+    for (unsigned i = 0; i != Vals.size(); ++i) {
       Value *V = Vals[i];
       if (!V) continue;
       if (isFunctionLocalValue(V)) {
@@ -223,8 +222,8 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
   }
 
   // Coallocate space for the node and Operands together, then placement new.
-  void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand));
-  N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal);
+  void *Ptr = malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand));
+  N = new (Ptr) MDNode(Context, Vals, isFunctionLocal);
 
   // InsertPoint will have been set by the FindNodeOrInsertPos call.
   pImpl->MDNodeSet.InsertNode(N, InsertPoint);
@@ -232,24 +231,24 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
   return N;
 }
 
-MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals) {
-  return getMDNode(Context, Vals, NumVals, FL_Unknown);
+MDNode *MDNode::get(LLVMContext &Context, ArrayRef<Value*> Vals) {
+  return getMDNode(Context, Vals, FL_Unknown);
 }
 
-MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context, Value *const *Vals,
-                                      unsigned NumVals, bool isFunctionLocal) {
-  return getMDNode(Context, Vals, NumVals, isFunctionLocal ? FL_Yes : FL_No);
+MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context,
+                                      ArrayRef<Value*> Vals,
+                                      bool isFunctionLocal) {
+  return getMDNode(Context, Vals, isFunctionLocal ? FL_Yes : FL_No);
 }
 
-MDNode *MDNode::getIfExists(LLVMContext &Context, Value *const *Vals,
-                            unsigned NumVals) {
-  return getMDNode(Context, Vals, NumVals, FL_Unknown, false);
+MDNode *MDNode::getIfExists(LLVMContext &Context, ArrayRef<Value*> Vals) {
+  return getMDNode(Context, Vals, FL_Unknown, false);
 }
 
-MDNode *MDNode::getTemporary(LLVMContext &Context, Value *const *Vals,
-                             unsigned NumVals) {
-  MDNode *N = (MDNode *)malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand));
-  N = new (N) MDNode(Context, Vals, NumVals, FL_No);
+MDNode *MDNode::getTemporary(LLVMContext &Context, ArrayRef<Value*> Vals) {
+  MDNode *N =
+    (MDNode *)malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand));
+  N = new (N) MDNode(Context, Vals, FL_No);
   N->setValueSubclassData(N->getSubclassDataFromValue() |
                           NotUniquedBit);
   LeakDetector::addGarbageObject(N);
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 8bfef98..637fa79 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/PassManagers.h"
 #include "llvm/PassManager.h"
+#include "llvm/DebugInfoProbe.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CommandLine.h"
@@ -25,6 +26,7 @@
 #include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/ADT/StringMap.h"
 #include <algorithm>
 #include <cstdio>
 #include <map>
@@ -63,11 +65,13 @@ PassOptionList;
 // Print IR out before/after specified passes.
 static PassOptionList
 PrintBefore("print-before",
-            llvm::cl::desc("Print IR before specified passes"));
+            llvm::cl::desc("Print IR before specified passes"),
+            cl::Hidden);
 
 static PassOptionList
 PrintAfter("print-after",
-           llvm::cl::desc("Print IR after specified passes"));
+           llvm::cl::desc("Print IR after specified passes"),
+           cl::Hidden);
 
 static cl::opt<bool>
 PrintBeforeAll("print-before-all",
@@ -440,6 +444,20 @@ char PassManagerImpl::ID = 0;
 namespace {
 
 //===----------------------------------------------------------------------===//
+// DebugInfoProbe
+
+static DebugInfoProbeInfo *TheDebugProbe;
+static void createDebugInfoProbe() {
+  if (TheDebugProbe) return;
+      
+  // Constructed the first time this is called. This guarantees that the 
+  // object will be constructed, if -enable-debug-info-probe is set, 
+  // before static globals, thus it will be destroyed before them.
+  static ManagedStatic<DebugInfoProbeInfo> DIP;
+  TheDebugProbe = &*DIP;
+}
+
+//===----------------------------------------------------------------------===//
 /// TimingInfo Class - This class is used to calculate information about the
 /// amount of time each pass takes to execute.  This only happens when
 /// -time-passes is enabled on the command line.
@@ -964,7 +982,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
       // Keep track of higher level analysis used by this manager.
       HigherLevelAnalysis.push_back(PRequired);
     } else
-      llvm_unreachable("Unable to accomodate Required Pass");
+      llvm_unreachable("Unable to accommodate Required Pass");
   }
 
   // Set P as P's last user until someone starts using P.
@@ -1428,6 +1446,7 @@ void FunctionPassManagerImpl::releaseMemoryOnTheFly() {
 bool FunctionPassManagerImpl::run(Function &F) {
   bool Changed = false;
   TimingInfo::createTheTimeInfo();
+  createDebugInfoProbe();
 
   initializeAllAnalysisInfo();
   for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
@@ -1475,13 +1494,16 @@ bool FPPassManager::runOnFunction(Function &F) {
     dumpRequiredSet(FP);
 
     initializeAnalysisImpl(FP);
-
+    if (TheDebugProbe)
+      TheDebugProbe->initialize(FP, F);
     {
       PassManagerPrettyStackEntry X(FP, F);
       TimeRegion PassTimer(getPassTimer(FP));
 
       LocalChanged |= FP->runOnFunction(F);
     }
+    if (TheDebugProbe)
+      TheDebugProbe->finalize(FP, F);
 
     Changed |= LocalChanged;
     if (LocalChanged)
@@ -1629,6 +1651,7 @@ Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
 bool PassManagerImpl::run(Module &M) {
   bool Changed = false;
   TimingInfo::createTheTimeInfo();
+  createDebugInfoProbe();
 
   dumpArguments();
   dumpPasses();
diff --git a/lib/VMCore/PassRegistry.cpp b/lib/VMCore/PassRegistry.cpp
index c97a170..fa92620 100644
--- a/lib/VMCore/PassRegistry.cpp
+++ b/lib/VMCore/PassRegistry.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
 
 // FIXME: We use ManagedStatic to erase the pass registrar on shutdown.
 // Unfortunately, passes are registered with static ctors, and having
-// llvm_shutdown clear this map prevents successful ressurection after 
+// llvm_shutdown clear this map prevents successful resurrection after
 // llvm_shutdown is run.  Ideally we should find a solution so that we don't
 // leak the map, AND can still resurrect after shutdown.
 static ManagedStatic<PassRegistry> PassRegistryObj;
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index be28ad1..b15304c 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Assembly/Writer.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/SCCIterator.h"
@@ -460,7 +461,7 @@ bool FunctionType::isValidArgumentType(const Type *ArgTy) {
 }
 
 FunctionType::FunctionType(const Type *Result,
-                           const std::vector<const Type*> &Params,
+                           ArrayRef<const Type*> Params,
                            bool IsVarArgs)
   : DerivedType(Result->getContext(), FunctionTyID), isVarArgs(IsVarArgs) {
   ContainedTys = reinterpret_cast<PATypeHandle*>(this+1);
@@ -483,7 +484,7 @@ FunctionType::FunctionType(const Type *Result,
 }
 
 StructType::StructType(LLVMContext &C, 
-                       const std::vector<const Type*> &Types, bool isPacked)
+                       ArrayRef<const Type*> Types, bool isPacked)
   : CompositeType(C, StructTyID) {
   ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1);
   NumContainedTys = Types.size();
@@ -838,7 +839,7 @@ FunctionValType FunctionValType::get(const FunctionType *FT) {
 
 // FunctionType::get - The factory function for the FunctionType class...
 FunctionType *FunctionType::get(const Type *ReturnType,
-                                const std::vector<const Type*> &Params,
+                                ArrayRef<const Type*> Params,
                                 bool isVarArg) {
   FunctionValType VT(ReturnType, Params, isVarArg);
   FunctionType *FT = 0;
@@ -915,7 +916,7 @@ bool VectorType::isValidElementType(const Type *ElemTy) {
 //
 
 StructType *StructType::get(LLVMContext &Context,
-                            const std::vector<const Type*> &ETypes, 
+                            ArrayRef<const Type*> ETypes, 
                             bool isPacked) {
   StructValType STV(ETypes, isPacked);
   StructType *ST = 0;
diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h
index 4694486..ad09478 100644
--- a/lib/VMCore/TypesContext.h
+++ b/lib/VMCore/TypesContext.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_TYPESCONTEXT_H
 #define LLVM_TYPESCONTEXT_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
 #include <map>
 
@@ -157,8 +158,8 @@ class StructValType {
   std::vector<const Type*> ElTypes;
   bool packed;
 public:
-  StructValType(const std::vector<const Type*> &args, bool isPacked)
-    : ElTypes(args), packed(isPacked) {}
+  StructValType(ArrayRef<const Type*> args, bool isPacked)
+    : ElTypes(args.vec()), packed(isPacked) {}
 
   static StructValType get(const StructType *ST) {
     std::vector<const Type *> ElTypes;
@@ -187,8 +188,8 @@ class FunctionValType {
   std::vector<const Type*> ArgTypes;
   bool isVarArg;
 public:
-  FunctionValType(const Type *ret, const std::vector<const Type*> &args,
-                  bool isVA) : RetTy(ret), ArgTypes(args), isVarArg(isVA) {}
+  FunctionValType(const Type *ret, ArrayRef<const Type*> args, bool isVA)
+    : RetTy(ret), ArgTypes(args.vec()), isVarArg(isVA) {}
 
   static FunctionValType get(const FunctionType *FT);
 
@@ -369,7 +370,7 @@ public:
 
         // Remove the old entry form TypesByHash.  If the hash values differ
         // now, remove it from the old place.  Otherwise, continue scanning
-        // withing this hashcode to reduce work.
+        // within this hashcode to reduce work.
         if (NewTypeHash != OldTypeHash) {
           RemoveFromTypesByHash(OldTypeHash, Ty);
         } else {
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 58ec6fe..8b89110 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -471,6 +471,23 @@ void Verifier::visitGlobalVariable(GlobalVariable &GV) {
             "invalid linkage type for global declaration", &GV);
   }
 
+  if (GV.hasName() && (GV.getName() == "llvm.global_ctors" ||
+                       GV.getName() == "llvm.global_dtors")) {
+    Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(),
+            "invalid linkage for intrinsic global variable", &GV);
+    // Don't worry about emitting an error for it not being an array,
+    // visitGlobalValue will complain on appending non-array.
+    if (const ArrayType *ATy = dyn_cast<ArrayType>(GV.getType())) {
+      const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
+      const PointerType *FuncPtrTy =
+          FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo();
+      Assert1(STy && STy->getNumElements() == 2 &&
+              STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
+              STy->getTypeAtIndex(1) == FuncPtrTy,
+              "wrong type for intrinsic global variable", &GV);
+    }
+  }
+
   visitGlobalValue(GV);
 }
 
@@ -826,30 +843,10 @@ void Verifier::visitReturnInst(ReturnInst &RI) {
     Assert2(N == 0,
             "Found return instr that returns non-void in Function of void "
             "return type!", &RI, F->getReturnType());
-  else if (N == 1 && F->getReturnType() == RI.getOperand(0)->getType()) {
-    // Exactly one return value and it matches the return type. Good.
-  } else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
-    // The return type is a struct; check for multiple return values.
-    Assert2(STy->getNumElements() == N,
-            "Incorrect number of return values in ret instruction!",
-            &RI, F->getReturnType());
-    for (unsigned i = 0; i != N; ++i)
-      Assert2(STy->getElementType(i) == RI.getOperand(i)->getType(),
-              "Function return type does not match operand "
-              "type of return inst!", &RI, F->getReturnType());
-  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(F->getReturnType())) {
-    // The return type is an array; check for multiple return values.
-    Assert2(ATy->getNumElements() == N,
-            "Incorrect number of return values in ret instruction!",
-            &RI, F->getReturnType());
-    for (unsigned i = 0; i != N; ++i)
-      Assert2(ATy->getElementType() == RI.getOperand(i)->getType(),
-              "Function return type does not match operand "
-              "type of return inst!", &RI, F->getReturnType());
-  } else {
-    CheckFailed("Function return type does not match operand "
-                "type of return inst!", &RI, F->getReturnType());
-  }
+  else
+    Assert2(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(),
+            "Function return type does not match operand "
+            "type of return inst!", &RI, F->getReturnType());
 
   // Check to make sure that the return value has necessary properties for
   // terminators...
diff --git a/projects/sample/autoconf/configure.ac b/projects/sample/autoconf/configure.ac
index 4e61bee..bb75bbd 100644
--- a/projects/sample/autoconf/configure.ac
+++ b/projects/sample/autoconf/configure.ac
@@ -15,7 +15,7 @@ dnl Tell autoconf that this is an LLVM project being configured
 dnl This provides the --with-llvmsrc and --with-llvmobj options
 LLVM_CONFIG_PROJECT($LLVM_ABS_SRC_ROOT,$LLVM_ABS_OBJ_ROOT)
 
-dnl Tell autoconf that the auxilliary files are actually located in
+dnl Tell autoconf that the auxiliary files are actually located in
 dnl the LLVM autoconf directory, not here.
 AC_CONFIG_AUX_DIR($LLVM_SRC/autoconf)
 
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
new file mode 100644
index 0000000..502b91d
--- /dev/null
+++ b/runtime/CMakeLists.txt
@@ -0,0 +1,5 @@
+if( NOT LLVM_BUILD_RUNTIME )
+  set(EXCLUDE_FROM_ALL ON)
+endif()
+
+add_subdirectory(libprofile)
diff --git a/runtime/libprofile/CMakeLists.txt b/runtime/libprofile/CMakeLists.txt
new file mode 100644
index 0000000..414ad00
--- /dev/null
+++ b/runtime/libprofile/CMakeLists.txt
@@ -0,0 +1,19 @@
+set(SOURCES
+  BasicBlockTracing.c
+  CommonProfiling.c
+  GCDAProfiling.c
+  PathProfiling.c
+  EdgeProfiling.c
+  OptimalEdgeProfiling.c
+  Profiling.h
+  )
+
+add_llvm_library( profile_rt-static ${SOURCES} )
+set_target_properties( profile_rt-static
+  PROPERTIES
+  OUTPUT_NAME "profile_rt" )
+
+add_llvm_loadable_module( profile_rt-shared ${SOURCES} )
+set_target_properties( profile_rt-shared
+  PROPERTIES
+  OUTPUT_NAME "profile_rt" )
diff --git a/runtime/libprofile/CommonProfiling.c b/runtime/libprofile/CommonProfiling.c
index 1c1771c..210a5e5 100644
--- a/runtime/libprofile/CommonProfiling.c
+++ b/runtime/libprofile/CommonProfiling.c
@@ -19,7 +19,11 @@
 #include <fcntl.h>
 #include <stdio.h>
 #include <string.h>
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
 #include <unistd.h>
+#else
+#include <io.h>
+#endif
 #include <stdlib.h>
 
 static char *SavedArgs = 0;
diff --git a/runtime/libprofile/GCDAProfiling.c b/runtime/libprofile/GCDAProfiling.c
new file mode 100644
index 0000000..2dcf22d
--- /dev/null
+++ b/runtime/libprofile/GCDAProfiling.c
@@ -0,0 +1,152 @@
+/*===- GCDAProfiling.c - Support library for GCDA file emission -----------===*\
+|*
+|*                     The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
+|* 
+|*===----------------------------------------------------------------------===*|
+|* 
+|* This file implements the call back routines for the gcov profiling
+|* instrumentation pass. Link against this library when running code through
+|* the -insert-gcov-profiling LLVM pass.
+|*
+|* We emit files in a corrupt version of GCOV's "gcda" file format. These files
+|* are only close enough that LCOV will happily parse them. Anything that lcov
+|* ignores is missing.
+|*
+|* TODO: gcov is multi-process safe by having each exit open the existing file
+|* and append to it. We'd like to achieve that and be thread-safe too.
+|*
+\*===----------------------------------------------------------------------===*/
+
+#include "llvm/Support/DataTypes.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* #define DEBUG_GCDAPROFILING */
+
+/*
+ * --- GCOV file format I/O primitives ---
+ */
+
+static FILE *output_file = NULL;
+
+static void write_int32(uint32_t i) {
+  fwrite(&i, 4, 1, output_file);
+}
+
+static void write_int64(uint64_t i) {
+  uint32_t lo, hi;
+  lo = i >>  0;
+  hi = i >> 32;
+
+  write_int32(lo);
+  write_int32(hi);
+}
+
+static char *mangle_filename(const char *orig_filename) {
+  /* TODO: handle GCOV_PREFIX_STRIP */
+  const char *prefix;
+  char *filename = 0;
+
+  prefix = getenv("GCOV_PREFIX");
+
+  if (!prefix)
+    return strdup(filename);
+
+  filename = malloc(strlen(prefix) + 1 + strlen(orig_filename) + 1);
+  strcpy(filename, prefix);
+  strcat(filename, "/");
+  strcat(filename, orig_filename);
+
+  return filename;
+}
+
+/*
+ * --- LLVM line counter API ---
+ */
+
+/* A file in this case is a translation unit. Each .o file built with line
+ * profiling enabled will emit to a different file. Only one file may be
+ * started at a time.
+ */
+void llvm_gcda_start_file(const char *orig_filename) {
+  char *filename;
+  filename = mangle_filename(orig_filename);
+  output_file = fopen(filename, "wb");
+
+  /* gcda file, version 404*, stamp LLVM. */
+  fwrite("adcg*404MVLL", 12, 1, output_file);
+
+#ifdef DEBUG_GCDAPROFILING
+  printf("llvmgcda: [%s]\n", orig_filename);
+#endif
+
+  free(filename);
+}
+
+/* Given an array of pointers to counters (counters), increment the n-th one,
+ * where we're also given a pointer to n (predecessor).
+ */
+void llvm_gcda_increment_indirect_counter(uint32_t *predecessor,
+                                          uint64_t **counters) {
+  uint64_t *counter;
+  uint32_t pred;
+
+  pred = *predecessor;
+  if (pred == 0xffffffff)
+    return;
+  counter = counters[pred];
+
+  /* Don't crash if the pred# is out of sync. This can happen due to threads,
+     or because of a TODO in GCOVProfiling.cpp buildEdgeLookupTable(). */
+  if (counter)
+    ++*counter;
+#ifdef DEBUG_GCDAPROFILING
+  else
+    printf("llvmgcda: increment_indirect_counter counters=%x, pred=%u\n",
+           state_table_row, *predecessor);
+#endif
+}
+
+void llvm_gcda_emit_function(uint32_t ident) {
+#ifdef DEBUG_GCDAPROFILING
+  printf("llvmgcda: function id=%x\n", ident);
+#endif
+
+  /* function tag */  
+  fwrite("\0\0\0\1", 4, 1, output_file);
+  write_int32(2);
+  write_int32(ident);
+  write_int32(0);
+}
+
+void llvm_gcda_emit_arcs(uint32_t num_counters, uint64_t *counters) {
+  uint32_t i;
+  /* counter #1 (arcs) tag */
+  fwrite("\0\0\xa1\1", 4, 1, output_file);
+  write_int32(num_counters * 2);
+  for (i = 0; i < num_counters; ++i) {
+    write_int64(counters[i]);
+  }
+
+#ifdef DEBUG_GCDAPROFILING
+  printf("llvmgcda:   %u arcs\n", num_counters);
+  for (i = 0; i < num_counters; ++i) {
+    printf("llvmgcda:   %llu\n", (unsigned long long)counters[i]);
+  }
+#endif
+}
+
+void llvm_gcda_end_file() {
+  /* Write out EOF record. */
+  fwrite("\0\0\0\0\0\0\0\0", 8, 1, output_file);
+  fclose(output_file);
+  output_file = NULL;
+
+#ifdef DEBUG_GCDAPROFILING
+  printf("llvmgcda: -----\n");
+#endif
+}
diff --git a/runtime/libprofile/Makefile b/runtime/libprofile/Makefile
index 4125af6..eced5e5 100644
--- a/runtime/libprofile/Makefile
+++ b/runtime/libprofile/Makefile
@@ -13,9 +13,9 @@ include $(LEVEL)/Makefile.config
 ifneq ($(strip $(LLVMCC)),)
 BYTECODE_LIBRARY = 1
 endif
-SHARED_LIBRARY = 1
-LOADABLE_MODULE = 1
 LIBRARYNAME = profile_rt
+LINK_LIBS_IN_SHARED = 1
+SHARED_LIBRARY = 1
 EXTRA_DIST = libprofile.exports
 EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/libprofile.exports
 
diff --git a/runtime/libprofile/OptimalEdgeProfiling.c b/runtime/libprofile/OptimalEdgeProfiling.c
index eb7887b..947da53 100644
--- a/runtime/libprofile/OptimalEdgeProfiling.c
+++ b/runtime/libprofile/OptimalEdgeProfiling.c
@@ -26,7 +26,7 @@ static void OptEdgeProfAtExitHandler() {
   /* Note that, although the array has a counter for each edge, not all
    * counters are updated, the ones that are not used are initialised with -1.
    * When loading this information the counters with value -1 have to be
-   * recalculated, it is guranteed that this is possible.
+   * recalculated, it is guaranteed that this is possible.
    */
   write_profiling_data(OptEdgeInfo, ArrayStart, NumElements);
 }
diff --git a/runtime/libprofile/PathProfiling.c b/runtime/libprofile/PathProfiling.c
index 651e63c..2836785 100644
--- a/runtime/libprofile/PathProfiling.c
+++ b/runtime/libprofile/PathProfiling.c
@@ -15,14 +15,22 @@
 
 #include "Profiling.h"
 #include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <sys/types.h>
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
 #include <unistd.h>
+#else
+#include <io.h>
+#endif
 #include <string.h>
 #include <stdlib.h>
-#include <unistd.h>
-#include <stdint.h>
 #include <stdio.h>
 
+/* Must use __inline in Microsoft C */
+#if defined(_MSC_VER)
+#define inline __inline
+#endif
+
 /* note that this is used for functions with large path counts,
          but it is unlikely those paths will ALL be executed */
 #define ARBITRARY_HASH_BIN_COUNT 100
@@ -104,8 +112,8 @@ void writeArrayTable(uint32_t fNumber, ftEntry_t* ft, uint32_t* funcCount) {
   }
 }
 
-inline uint32_t hash (uint32_t key) {
-  /* this may benifit from a proper hash function */
+static inline uint32_t hash (uint32_t key) {
+  /* this may benefit from a proper hash function */
   return key%ARBITRARY_HASH_BIN_COUNT;
 }
 
@@ -147,7 +155,8 @@ void writeHashTable(uint32_t functionNumber, pathHashTable_t* hashTable) {
 }
 
 /* Return a pointer to this path's specific path counter */
-inline uint32_t* getPathCounter(uint32_t functionNumber, uint32_t pathNumber) {
+static inline uint32_t* getPathCounter(uint32_t functionNumber,
+                                       uint32_t pathNumber) {
   pathHashTable_t* hashTable;
   pathHashEntry_t* hashEntry;
   uint32_t index = hash(pathNumber);
@@ -214,7 +223,7 @@ void llvm_decrement_path_count (uint32_t functionNumber, uint32_t pathNumber) {
  *      +-----------------+-----------------+
  *
  */
-static void pathProfAtExitHandler() {
+static void pathProfAtExitHandler(void) {
   int outFile = getOutFile();
   uint32_t i;
   uint32_t header[2] = { PathInfo, 0 };
diff --git a/runtime/libprofile/libprofile.exports b/runtime/libprofile/libprofile.exports
index b8057c7..2f25be6 100644
--- a/runtime/libprofile/libprofile.exports
+++ b/runtime/libprofile/libprofile.exports
@@ -5,3 +5,8 @@ llvm_start_basic_block_tracing
 llvm_trace_basic_block
 llvm_increment_path_count
 llvm_decrement_path_count
+llvm_gcda_start_file
+llvm_gcda_increment_indirect_counter
+llvm_gcda_emit_function
+llvm_gcda_emit_arcs
+llvm_gcda_end_file
diff --git a/test/Analysis/BasicAA/intrinsics.ll b/test/Analysis/BasicAA/intrinsics.ll
new file mode 100644
index 0000000..59725cf
--- /dev/null
+++ b/test/Analysis/BasicAA/intrinsics.ll
@@ -0,0 +1,39 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+
+; BasicAA should prove that these calls don't interfere, since they are
+; IntrArgReadMem and have noalias pointers.
+
+; CHECK:      define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+; CHECK-NEXT:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK-NEXT:   %c = add <8 x i16> %a, %a
+define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) {
+entry:
+  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+  call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+  %c = add <8 x i16> %a, %b
+  ret <8 x i16> %c
+}
+
+; CHECK:      define <8 x i16> @test1(i8* %p, <8 x i16> %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %q = getelementptr i8* %p, i64 16
+; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+; CHECK-NEXT:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK-NEXT:   %c = add <8 x i16> %a, %a
+define <8 x i16> @test1(i8* %p, <8 x i16> %y) {
+entry:
+  %q = getelementptr i8* %p, i64 16
+  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+  call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+  %c = add <8 x i16> %a, %b
+  ret <8 x i16> %c
+}
+
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
diff --git a/test/Analysis/BasicAA/store-promote.ll b/test/Analysis/BasicAA/store-promote.ll
index 33d0f3a..0db805c 100644
--- a/test/Analysis/BasicAA/store-promote.ll
+++ b/test/Analysis/BasicAA/store-promote.ll
@@ -24,7 +24,7 @@ Out:            ; preds = %Loop
         
 ; The Loop block should be empty after the load/store are promoted.
 ; CHECK:     @test1
-; CHECK:        load i32* @B
+; CHECK:        load i32* @A
 ; CHECK:      Loop:
 ; CHECK-NEXT:   br i1 %c, label %Out, label %Loop
 ; CHECK:      Out:
diff --git a/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll b/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll
deleted file mode 100644
index b73b7f03..0000000
--- a/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll
+++ /dev/null
@@ -1,97 +0,0 @@
-; RUN: opt < %s -analyze -postdomfrontier \
-; RUN:   -disable-verify
-; ModuleID = '2006-09-26-PostDominanceFrontier.bc'
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [44 x i8] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
-@TOP = external global i64*		; <i64**> [#uses=1]
-@BOT = external global i64*		; <i64**> [#uses=1]
-@str = external global [2 x i8]		; <[2 x i8]*> [#uses=0]
-
-declare void @fopen()
-
-define void @main(i8** %argv) {
-entry:
-	%netSelect.i507 = alloca i64, align 8		; <i64*> [#uses=0]
-	%topStart.i = alloca i64, align 8		; <i64*> [#uses=0]
-	%topEnd.i = alloca i64, align 8		; <i64*> [#uses=0]
-	%botStart.i = alloca i64, align 8		; <i64*> [#uses=0]
-	%botEnd.i = alloca i64, align 8		; <i64*> [#uses=0]
-	%c1.i154 = alloca i32, align 4		; <i32*> [#uses=0]
-	%b1.i155 = alloca i32, align 4		; <i32*> [#uses=0]
-	%t1.i156 = alloca i32, align 4		; <i32*> [#uses=0]
-	%c1.i = alloca i32, align 4		; <i32*> [#uses=0]
-	%b1.i = alloca i32, align 4		; <i32*> [#uses=0]
-	%t1.i = alloca i32, align 4		; <i32*> [#uses=0]
-	%netSelect.i5 = alloca i64, align 8		; <i64*> [#uses=0]
-	%netSelect.i = alloca i64, align 8		; <i64*> [#uses=0]
-	%tmp2.i = getelementptr i8** %argv, i32 1		; <i8**> [#uses=1]
-	%tmp3.i4 = load i8** %tmp2.i		; <i8*> [#uses=0]
-	call void @fopen( )
-	br i1 false, label %DimensionChannel.exit, label %bb.backedge.i
-
-bb.backedge.i:		; preds = %entry
-	ret void
-
-DimensionChannel.exit:		; preds = %entry
-	%tmp13.i137 = malloc i64, i32 0		; <i64*> [#uses=1]
-	%tmp610.i = malloc i64, i32 0		; <i64*> [#uses=1]
-	br label %cond_true.i143
-
-cond_true.i143:		; preds = %cond_true.i143, %DimensionChannel.exit
-	%tmp9.i140 = getelementptr i64* %tmp13.i137, i64 0		; <i64*> [#uses=0]
-	%tmp12.i = getelementptr i64* %tmp610.i, i64 0		; <i64*> [#uses=0]
-	br i1 false, label %bb18.i144, label %cond_true.i143
-
-bb18.i144:		; preds = %cond_true.i143
-	call void @fopen( )
-	%tmp76.i105 = malloc i64, i32 0		; <i64*> [#uses=3]
-	%tmp674.i = malloc i64, i32 0		; <i64*> [#uses=2]
-	%tmp1072.i = malloc i64, i32 0		; <i64*> [#uses=2]
-	%tmp1470.i = malloc i64, i32 0		; <i64*> [#uses=1]
-	br label %cond_true.i114
-
-cond_true.i114:		; preds = %cond_true.i114, %bb18.i144
-	%tmp17.i108 = getelementptr i64* %tmp76.i105, i64 0		; <i64*> [#uses=0]
-	%tmp20.i = getelementptr i64* %tmp674.i, i64 0		; <i64*> [#uses=0]
-	%tmp23.i111 = getelementptr i64* %tmp1470.i, i64 0		; <i64*> [#uses=0]
-	br i1 false, label %cond_true40.i, label %cond_true.i114
-
-cond_true40.i:		; preds = %cond_true40.i, %cond_true.i114
-	%tmp33.i115 = getelementptr i64* %tmp1072.i, i64 0		; <i64*> [#uses=0]
-	br i1 false, label %bb142.i, label %cond_true40.i
-
-cond_next54.i:		; preds = %cond_true76.i
-	%tmp57.i = getelementptr i64* %tmp55.i, i64 0		; <i64*> [#uses=0]
-	br i1 false, label %bb64.i, label %bb69.i
-
-bb64.i:		; preds = %cond_true76.i, %cond_next54.i
-	%tmp67.i117 = getelementptr i64* %tmp76.i105, i64 0		; <i64*> [#uses=0]
-	br i1 false, label %bb114.i, label %cond_true111.i
-
-bb69.i:		; preds = %cond_next54.i
-	br i1 false, label %bb79.i, label %cond_true76.i
-
-cond_true76.i:		; preds = %bb142.i, %bb69.i
-	%tmp48.i = getelementptr i64* %tmp46.i, i64 0		; <i64*> [#uses=0]
-	br i1 false, label %bb64.i, label %cond_next54.i
-
-bb79.i:		; preds = %bb69.i
-	br i1 false, label %bb114.i, label %cond_true111.i
-
-cond_true111.i:		; preds = %bb79.i, %bb64.i
-	%tmp84.i127 = getelementptr i64* %tmp46.i, i64 0		; <i64*> [#uses=0]
-	ret void
-
-bb114.i:		; preds = %bb142.i, %bb79.i, %bb64.i
-	%tmp117.i = getelementptr i64* %tmp76.i105, i64 0		; <i64*> [#uses=0]
-	%tmp132.i131 = getelementptr i64* %tmp674.i, i64 0		; <i64*> [#uses=0]
-	%tmp122.i = getelementptr i64* %tmp1072.i, i64 0		; <i64*> [#uses=0]
-	ret void
-
-bb142.i:		; preds = %cond_true40.i
-	%tmp46.i = load i64** @BOT		; <i64*> [#uses=2]
-	%tmp55.i = load i64** @TOP		; <i64*> [#uses=1]
-	br i1 false, label %bb114.i, label %cond_true76.i
-}
diff --git a/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll b/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll
deleted file mode 100644
index 1ec056b..0000000
--- a/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll
+++ /dev/null
@@ -1,692 +0,0 @@
-; RUN: opt < %s -postdomfrontier -disable-output
-
-define void @SManager() {
-entry:
-	br label %bb.outer
-
-bb.outer:		; preds = %bb193, %entry
-	br label %bb.outer156
-
-bb.loopexit:		; preds = %bb442
-	br label %bb.outer156
-
-bb.outer156:		; preds = %bb.loopexit, %bb.outer
-	br label %bb
-
-bb:		; preds = %bb.backedge, %bb.outer156
-	br i1 false, label %cond_true, label %bb.cond_next_crit_edge
-
-bb.cond_next_crit_edge:		; preds = %bb
-	br label %cond_next
-
-cond_true:		; preds = %bb
-	br label %cond_next
-
-cond_next:		; preds = %cond_true, %bb.cond_next_crit_edge
-	br i1 false, label %cond_next.bb.backedge_crit_edge, label %cond_next107
-
-cond_next.bb.backedge_crit_edge:		; preds = %cond_next
-	br label %bb.backedge
-
-bb.backedge:		; preds = %cond_true112.bb.backedge_crit_edge, %cond_next.bb.backedge_crit_edge
-	br label %bb
-
-cond_next107:		; preds = %cond_next
-	br i1 false, label %cond_true112, label %cond_next197
-
-cond_true112:		; preds = %cond_next107
-	br i1 false, label %cond_true118, label %cond_true112.bb.backedge_crit_edge
-
-cond_true112.bb.backedge_crit_edge:		; preds = %cond_true112
-	br label %bb.backedge
-
-cond_true118:		; preds = %cond_true112
-	br i1 false, label %bb123.preheader, label %cond_true118.bb148_crit_edge
-
-cond_true118.bb148_crit_edge:		; preds = %cond_true118
-	br label %bb148
-
-bb123.preheader:		; preds = %cond_true118
-	br label %bb123
-
-bb123:		; preds = %bb142.bb123_crit_edge, %bb123.preheader
-	br i1 false, label %bb123.bb142_crit_edge, label %cond_next.i57
-
-bb123.bb142_crit_edge:		; preds = %bb123
-	br label %bb142
-
-cond_next.i57:		; preds = %bb123
-	br i1 false, label %cond_true135, label %cond_next.i57.bb142_crit_edge
-
-cond_next.i57.bb142_crit_edge:		; preds = %cond_next.i57
-	br label %bb142
-
-cond_true135:		; preds = %cond_next.i57
-	br label %bb142
-
-bb142:		; preds = %cond_true135, %cond_next.i57.bb142_crit_edge, %bb123.bb142_crit_edge
-	br i1 false, label %bb148.loopexit, label %bb142.bb123_crit_edge
-
-bb142.bb123_crit_edge:		; preds = %bb142
-	br label %bb123
-
-bb148.loopexit:		; preds = %bb142
-	br label %bb148
-
-bb148:		; preds = %bb148.loopexit, %cond_true118.bb148_crit_edge
-	br i1 false, label %bb151.preheader, label %bb148.bb177_crit_edge
-
-bb148.bb177_crit_edge:		; preds = %bb148
-	br label %bb177
-
-bb151.preheader:		; preds = %bb148
-	br label %bb151
-
-bb151:		; preds = %bb171.bb151_crit_edge, %bb151.preheader
-	br i1 false, label %bb151.bb171_crit_edge, label %cond_next.i49
-
-bb151.bb171_crit_edge:		; preds = %bb151
-	br label %bb171
-
-cond_next.i49:		; preds = %bb151
-	br i1 false, label %cond_true164, label %cond_next.i49.bb171_crit_edge
-
-cond_next.i49.bb171_crit_edge:		; preds = %cond_next.i49
-	br label %bb171
-
-cond_true164:		; preds = %cond_next.i49
-	br label %bb171
-
-bb171:		; preds = %cond_true164, %cond_next.i49.bb171_crit_edge, %bb151.bb171_crit_edge
-	br i1 false, label %bb177.loopexit, label %bb171.bb151_crit_edge
-
-bb171.bb151_crit_edge:		; preds = %bb171
-	br label %bb151
-
-bb177.loopexit:		; preds = %bb171
-	br label %bb177
-
-bb177:		; preds = %bb177.loopexit, %bb148.bb177_crit_edge
-	br i1 false, label %bb180.preheader, label %bb177.bb193_crit_edge
-
-bb177.bb193_crit_edge:		; preds = %bb177
-	br label %bb193
-
-bb180.preheader:		; preds = %bb177
-	br label %bb180
-
-bb180:		; preds = %bb180.bb180_crit_edge, %bb180.preheader
-	br i1 false, label %bb193.loopexit, label %bb180.bb180_crit_edge
-
-bb180.bb180_crit_edge:		; preds = %bb180
-	br label %bb180
-
-bb193.loopexit:		; preds = %bb180
-	br label %bb193
-
-bb193:		; preds = %bb193.loopexit, %bb177.bb193_crit_edge
-	br label %bb.outer
-
-cond_next197:		; preds = %cond_next107
-	br i1 false, label %cond_next210, label %cond_true205
-
-cond_true205:		; preds = %cond_next197
-	br i1 false, label %cond_true205.bb213_crit_edge, label %cond_true205.bb299_crit_edge
-
-cond_true205.bb299_crit_edge:		; preds = %cond_true205
-	br label %bb299
-
-cond_true205.bb213_crit_edge:		; preds = %cond_true205
-	br label %bb213
-
-cond_next210:		; preds = %cond_next197
-	br label %bb293
-
-bb213:		; preds = %bb293.bb213_crit_edge, %cond_true205.bb213_crit_edge
-	br i1 false, label %bb213.cond_next290_crit_edge, label %cond_true248
-
-bb213.cond_next290_crit_edge:		; preds = %bb213
-	br label %cond_next290
-
-cond_true248:		; preds = %bb213
-	br i1 false, label %cond_true248.cond_next290_crit_edge, label %cond_true255
-
-cond_true248.cond_next290_crit_edge:		; preds = %cond_true248
-	br label %cond_next290
-
-cond_true255:		; preds = %cond_true248
-	br i1 false, label %cond_true266, label %cond_true255.cond_next271_crit_edge
-
-cond_true255.cond_next271_crit_edge:		; preds = %cond_true255
-	br label %cond_next271
-
-cond_true266:		; preds = %cond_true255
-	br label %cond_next271
-
-cond_next271:		; preds = %cond_true266, %cond_true255.cond_next271_crit_edge
-	br label %cond_next290
-
-cond_next290:		; preds = %cond_next271, %cond_true248.cond_next290_crit_edge, %bb213.cond_next290_crit_edge
-	br label %bb293
-
-bb293:		; preds = %cond_next290, %cond_next210
-	br i1 false, label %bb293.bb213_crit_edge, label %bb293.bb299_crit_edge
-
-bb293.bb299_crit_edge:		; preds = %bb293
-	br label %bb299
-
-bb293.bb213_crit_edge:		; preds = %bb293
-	br label %bb213
-
-bb299:		; preds = %bb293.bb299_crit_edge, %cond_true205.bb299_crit_edge
-	br i1 false, label %bb302.preheader, label %bb299.bb390_crit_edge
-
-bb299.bb390_crit_edge:		; preds = %bb299
-	br label %bb390
-
-bb302.preheader:		; preds = %bb299
-	br label %bb302
-
-bb302:		; preds = %bb384.bb302_crit_edge, %bb302.preheader
-	br i1 false, label %bb302.bb384_crit_edge, label %cond_true339
-
-bb302.bb384_crit_edge:		; preds = %bb302
-	br label %bb384
-
-cond_true339:		; preds = %bb302
-	br i1 false, label %cond_true339.bb384_crit_edge, label %cond_true346
-
-cond_true339.bb384_crit_edge:		; preds = %cond_true339
-	br label %bb384
-
-cond_true346:		; preds = %cond_true339
-	br i1 false, label %cond_true357, label %cond_true346.cond_next361_crit_edge
-
-cond_true346.cond_next361_crit_edge:		; preds = %cond_true346
-	br label %cond_next361
-
-cond_true357:		; preds = %cond_true346
-	br label %cond_next361
-
-cond_next361:		; preds = %cond_true357, %cond_true346.cond_next361_crit_edge
-	br label %bb384
-
-bb384:		; preds = %cond_next361, %cond_true339.bb384_crit_edge, %bb302.bb384_crit_edge
-	br i1 false, label %bb390.loopexit, label %bb384.bb302_crit_edge
-
-bb384.bb302_crit_edge:		; preds = %bb384
-	br label %bb302
-
-bb390.loopexit:		; preds = %bb384
-	br label %bb390
-
-bb390:		; preds = %bb390.loopexit, %bb299.bb390_crit_edge
-	br i1 false, label %bb391.preheader, label %bb390.bb442.preheader_crit_edge
-
-bb390.bb442.preheader_crit_edge:		; preds = %bb390
-	br label %bb442.preheader
-
-bb391.preheader:		; preds = %bb390
-	br label %bb391
-
-bb391:		; preds = %bb413.bb391_crit_edge, %bb391.preheader
-	br i1 false, label %bb391.bb413_crit_edge, label %cond_next404
-
-bb391.bb413_crit_edge:		; preds = %bb391
-	br label %bb413
-
-cond_next404:		; preds = %bb391
-	br i1 false, label %cond_next404.HWrite.exit_crit_edge, label %cond_next.i13
-
-cond_next404.HWrite.exit_crit_edge:		; preds = %cond_next404
-	br label %HWrite.exit
-
-cond_next.i13:		; preds = %cond_next404
-	br i1 false, label %cond_next.i13.cond_next13.i_crit_edge, label %cond_true12.i
-
-cond_next.i13.cond_next13.i_crit_edge:		; preds = %cond_next.i13
-	br label %cond_next13.i
-
-cond_true12.i:		; preds = %cond_next.i13
-	br label %cond_next13.i
-
-cond_next13.i:		; preds = %cond_true12.i, %cond_next.i13.cond_next13.i_crit_edge
-	br i1 false, label %cond_next13.i.bb.i22_crit_edge, label %cond_next43.i
-
-cond_next13.i.bb.i22_crit_edge:		; preds = %cond_next13.i
-	br label %bb.i22
-
-cond_next43.i:		; preds = %cond_next13.i
-	br i1 false, label %cond_next43.i.bb.i22_crit_edge, label %bb60.i
-
-cond_next43.i.bb.i22_crit_edge:		; preds = %cond_next43.i
-	br label %bb.i22
-
-bb.i22:		; preds = %cond_next43.i.bb.i22_crit_edge, %cond_next13.i.bb.i22_crit_edge
-	br label %bb413
-
-bb60.i:		; preds = %cond_next43.i
-	br i1 false, label %bb60.i.HWrite.exit_crit_edge, label %cond_true81.i
-
-bb60.i.HWrite.exit_crit_edge:		; preds = %bb60.i
-	br label %HWrite.exit
-
-cond_true81.i:		; preds = %bb60.i
-	br label %bb413
-
-HWrite.exit:		; preds = %bb60.i.HWrite.exit_crit_edge, %cond_next404.HWrite.exit_crit_edge
-	br label %bb413
-
-bb413:		; preds = %HWrite.exit, %cond_true81.i, %bb.i22, %bb391.bb413_crit_edge
-	br i1 false, label %bb442.preheader.loopexit, label %bb413.bb391_crit_edge
-
-bb413.bb391_crit_edge:		; preds = %bb413
-	br label %bb391
-
-bb442.preheader.loopexit:		; preds = %bb413
-	br label %bb442.preheader
-
-bb442.preheader:		; preds = %bb442.preheader.loopexit, %bb390.bb442.preheader_crit_edge
-	br label %bb442.outer
-
-bb420:		; preds = %bb442
-	br i1 false, label %bb439.loopexit, label %cond_next433
-
-cond_next433:		; preds = %bb420
-	br i1 false, label %cond_next433.HRead.exit.loopexit_crit_edge, label %cond_next.i
-
-cond_next433.HRead.exit.loopexit_crit_edge:		; preds = %cond_next433
-	br label %HRead.exit.loopexit
-
-cond_next.i:		; preds = %cond_next433
-	br i1 false, label %cond_true9.i, label %cond_false223.i
-
-cond_true9.i:		; preds = %cond_next.i
-	switch i32 0, label %cond_false.i [
-		 i32 1, label %cond_true9.i.cond_true15.i_crit_edge
-		 i32 5, label %cond_true9.i.cond_true15.i_crit_edge9
-	]
-
-cond_true9.i.cond_true15.i_crit_edge9:		; preds = %cond_true9.i
-	br label %cond_true15.i
-
-cond_true9.i.cond_true15.i_crit_edge:		; preds = %cond_true9.i
-	br label %cond_true15.i
-
-cond_true15.i:		; preds = %cond_true9.i.cond_true15.i_crit_edge, %cond_true9.i.cond_true15.i_crit_edge9
-	br i1 false, label %cond_true15.i.cond_true44.i_crit_edge, label %cond_true15.i.cond_false49.i_crit_edge
-
-cond_true15.i.cond_false49.i_crit_edge:		; preds = %cond_true15.i
-	br label %cond_false49.i
-
-cond_true15.i.cond_true44.i_crit_edge:		; preds = %cond_true15.i
-	br label %cond_true44.i
-
-cond_false.i:		; preds = %cond_true9.i
-	br i1 false, label %cond_false.i.cond_next39.i_crit_edge, label %cond_true30.i
-
-cond_false.i.cond_next39.i_crit_edge:		; preds = %cond_false.i
-	br label %cond_next39.i
-
-cond_true30.i:		; preds = %cond_false.i
-	br label %cond_next39.i
-
-cond_next39.i:		; preds = %cond_true30.i, %cond_false.i.cond_next39.i_crit_edge
-	br i1 false, label %cond_next39.i.cond_true44.i_crit_edge, label %cond_next39.i.cond_false49.i_crit_edge
-
-cond_next39.i.cond_false49.i_crit_edge:		; preds = %cond_next39.i
-	br label %cond_false49.i
-
-cond_next39.i.cond_true44.i_crit_edge:		; preds = %cond_next39.i
-	br label %cond_true44.i
-
-cond_true44.i:		; preds = %cond_next39.i.cond_true44.i_crit_edge, %cond_true15.i.cond_true44.i_crit_edge
-	br i1 false, label %cond_true44.i.cond_next70.i_crit_edge, label %cond_true44.i.cond_true61.i_crit_edge
-
-cond_true44.i.cond_true61.i_crit_edge:		; preds = %cond_true44.i
-	br label %cond_true61.i
-
-cond_true44.i.cond_next70.i_crit_edge:		; preds = %cond_true44.i
-	br label %cond_next70.i
-
-cond_false49.i:		; preds = %cond_next39.i.cond_false49.i_crit_edge, %cond_true15.i.cond_false49.i_crit_edge
-	br i1 false, label %cond_false49.i.cond_next70.i_crit_edge, label %cond_false49.i.cond_true61.i_crit_edge
-
-cond_false49.i.cond_true61.i_crit_edge:		; preds = %cond_false49.i
-	br label %cond_true61.i
-
-cond_false49.i.cond_next70.i_crit_edge:		; preds = %cond_false49.i
-	br label %cond_next70.i
-
-cond_true61.i:		; preds = %cond_false49.i.cond_true61.i_crit_edge, %cond_true44.i.cond_true61.i_crit_edge
-	br i1 false, label %cond_true61.i.cond_next70.i_crit_edge, label %cond_true67.i
-
-cond_true61.i.cond_next70.i_crit_edge:		; preds = %cond_true61.i
-	br label %cond_next70.i
-
-cond_true67.i:		; preds = %cond_true61.i
-	br label %cond_next70.i
-
-cond_next70.i:		; preds = %cond_true67.i, %cond_true61.i.cond_next70.i_crit_edge, %cond_false49.i.cond_next70.i_crit_edge, %cond_true44.i.cond_next70.i_crit_edge
-	br i1 false, label %cond_true77.i, label %cond_next81.i
-
-cond_true77.i:		; preds = %cond_next70.i
-	br label %bb442.outer.backedge
-
-cond_next81.i:		; preds = %cond_next70.i
-	br i1 false, label %cond_true87.i, label %cond_false94.i
-
-cond_true87.i:		; preds = %cond_next81.i
-	br i1 false, label %cond_true87.i.cond_true130.i_crit_edge, label %cond_true87.i.cond_next135.i_crit_edge
-
-cond_true87.i.cond_next135.i_crit_edge:		; preds = %cond_true87.i
-	br label %cond_next135.i
-
-cond_true87.i.cond_true130.i_crit_edge:		; preds = %cond_true87.i
-	br label %cond_true130.i
-
-cond_false94.i:		; preds = %cond_next81.i
-	switch i32 0, label %cond_false94.i.cond_next125.i_crit_edge [
-		 i32 1, label %cond_false94.i.cond_true100.i_crit_edge
-		 i32 5, label %cond_false94.i.cond_true100.i_crit_edge10
-	]
-
-cond_false94.i.cond_true100.i_crit_edge10:		; preds = %cond_false94.i
-	br label %cond_true100.i
-
-cond_false94.i.cond_true100.i_crit_edge:		; preds = %cond_false94.i
-	br label %cond_true100.i
-
-cond_false94.i.cond_next125.i_crit_edge:		; preds = %cond_false94.i
-	br label %cond_next125.i
-
-cond_true100.i:		; preds = %cond_false94.i.cond_true100.i_crit_edge, %cond_false94.i.cond_true100.i_crit_edge10
-	br i1 false, label %cond_true107.i, label %cond_true100.i.cond_next109.i_crit_edge
-
-cond_true100.i.cond_next109.i_crit_edge:		; preds = %cond_true100.i
-	br label %cond_next109.i
-
-cond_true107.i:		; preds = %cond_true100.i
-	br label %cond_next109.i
-
-cond_next109.i:		; preds = %cond_true107.i, %cond_true100.i.cond_next109.i_crit_edge
-	br i1 false, label %cond_next109.i.cond_next125.i_crit_edge, label %cond_true116.i
-
-cond_next109.i.cond_next125.i_crit_edge:		; preds = %cond_next109.i
-	br label %cond_next125.i
-
-cond_true116.i:		; preds = %cond_next109.i
-	br label %cond_next125.i
-
-cond_next125.i:		; preds = %cond_true116.i, %cond_next109.i.cond_next125.i_crit_edge, %cond_false94.i.cond_next125.i_crit_edge
-	br i1 false, label %cond_next125.i.cond_true130.i_crit_edge, label %cond_next125.i.cond_next135.i_crit_edge
-
-cond_next125.i.cond_next135.i_crit_edge:		; preds = %cond_next125.i
-	br label %cond_next135.i
-
-cond_next125.i.cond_true130.i_crit_edge:		; preds = %cond_next125.i
-	br label %cond_true130.i
-
-cond_true130.i:		; preds = %cond_next125.i.cond_true130.i_crit_edge, %cond_true87.i.cond_true130.i_crit_edge
-	br label %cond_next135.i
-
-cond_next135.i:		; preds = %cond_true130.i, %cond_next125.i.cond_next135.i_crit_edge, %cond_true87.i.cond_next135.i_crit_edge
-	br i1 false, label %cond_true142.i, label %cond_next135.i.cond_next149.i_crit_edge
-
-cond_next135.i.cond_next149.i_crit_edge:		; preds = %cond_next135.i
-	br label %cond_next149.i
-
-cond_true142.i:		; preds = %cond_next135.i
-	br label %cond_next149.i
-
-cond_next149.i:		; preds = %cond_true142.i, %cond_next135.i.cond_next149.i_crit_edge
-	br i1 false, label %cond_true156.i, label %cond_next149.i.cond_next163.i_crit_edge
-
-cond_next149.i.cond_next163.i_crit_edge:		; preds = %cond_next149.i
-	br label %cond_next163.i
-
-cond_true156.i:		; preds = %cond_next149.i
-	br label %cond_next163.i
-
-cond_next163.i:		; preds = %cond_true156.i, %cond_next149.i.cond_next163.i_crit_edge
-	br i1 false, label %cond_true182.i, label %cond_next163.i.cond_next380.i_crit_edge
-
-cond_next163.i.cond_next380.i_crit_edge:		; preds = %cond_next163.i
-	br label %cond_next380.i
-
-cond_true182.i:		; preds = %cond_next163.i
-	br i1 false, label %cond_true182.i.cond_next380.i_crit_edge, label %cond_true196.i
-
-cond_true182.i.cond_next380.i_crit_edge:		; preds = %cond_true182.i
-	br label %cond_next380.i
-
-cond_true196.i:		; preds = %cond_true182.i
-	br i1 false, label %cond_true210.i, label %cond_true196.i.cond_next380.i_crit_edge
-
-cond_true196.i.cond_next380.i_crit_edge:		; preds = %cond_true196.i
-	br label %cond_next380.i
-
-cond_true210.i:		; preds = %cond_true196.i
-	br i1 false, label %cond_true216.i, label %cond_true210.i.cond_next380.i_crit_edge
-
-cond_true210.i.cond_next380.i_crit_edge:		; preds = %cond_true210.i
-	br label %cond_next380.i
-
-cond_true216.i:		; preds = %cond_true210.i
-	br label %cond_next380.i
-
-cond_false223.i:		; preds = %cond_next.i
-	br i1 false, label %cond_true229.i, label %cond_false355.i
-
-cond_true229.i:		; preds = %cond_false223.i
-	br i1 false, label %cond_true229.i.HRead.exit.loopexit_crit_edge, label %cond_next243.i
-
-cond_true229.i.HRead.exit.loopexit_crit_edge:		; preds = %cond_true229.i
-	br label %HRead.exit.loopexit
-
-cond_next243.i:		; preds = %cond_true229.i
-	br i1 false, label %cond_true248.i, label %cond_false255.i
-
-cond_true248.i:		; preds = %cond_next243.i
-	br label %cond_next260.i
-
-cond_false255.i:		; preds = %cond_next243.i
-	br label %cond_next260.i
-
-cond_next260.i:		; preds = %cond_false255.i, %cond_true248.i
-	br i1 false, label %cond_true267.i, label %cond_next273.i
-
-cond_true267.i:		; preds = %cond_next260.i
-	br label %bb442.backedge
-
-bb442.backedge:		; preds = %bb.i, %cond_true267.i
-	br label %bb442
-
-cond_next273.i:		; preds = %cond_next260.i
-	br i1 false, label %cond_true281.i, label %cond_next273.i.cond_next288.i_crit_edge
-
-cond_next273.i.cond_next288.i_crit_edge:		; preds = %cond_next273.i
-	br label %cond_next288.i
-
-cond_true281.i:		; preds = %cond_next273.i
-	br label %cond_next288.i
-
-cond_next288.i:		; preds = %cond_true281.i, %cond_next273.i.cond_next288.i_crit_edge
-	br i1 false, label %cond_true295.i, label %cond_next288.i.cond_next302.i_crit_edge
-
-cond_next288.i.cond_next302.i_crit_edge:		; preds = %cond_next288.i
-	br label %cond_next302.i
-
-cond_true295.i:		; preds = %cond_next288.i
-	br label %cond_next302.i
-
-cond_next302.i:		; preds = %cond_true295.i, %cond_next288.i.cond_next302.i_crit_edge
-	br i1 false, label %cond_next302.i.cond_next380.i_crit_edge, label %cond_true328.i
-
-cond_next302.i.cond_next380.i_crit_edge:		; preds = %cond_next302.i
-	br label %cond_next380.i
-
-cond_true328.i:		; preds = %cond_next302.i
-	br i1 false, label %cond_true343.i, label %cond_true328.i.cond_next380.i_crit_edge
-
-cond_true328.i.cond_next380.i_crit_edge:		; preds = %cond_true328.i
-	br label %cond_next380.i
-
-cond_true343.i:		; preds = %cond_true328.i
-	br i1 false, label %cond_true349.i, label %cond_true343.i.cond_next380.i_crit_edge
-
-cond_true343.i.cond_next380.i_crit_edge:		; preds = %cond_true343.i
-	br label %cond_next380.i
-
-cond_true349.i:		; preds = %cond_true343.i
-	br label %cond_next380.i
-
-cond_false355.i:		; preds = %cond_false223.i
-	br i1 false, label %cond_false355.i.bb.i_crit_edge, label %cond_next363.i
-
-cond_false355.i.bb.i_crit_edge:		; preds = %cond_false355.i
-	br label %bb.i
-
-cond_next363.i:		; preds = %cond_false355.i
-	br i1 false, label %bb377.i, label %cond_next363.i.bb.i_crit_edge
-
-cond_next363.i.bb.i_crit_edge:		; preds = %cond_next363.i
-	br label %bb.i
-
-bb.i:		; preds = %cond_next363.i.bb.i_crit_edge, %cond_false355.i.bb.i_crit_edge
-	br label %bb442.backedge
-
-bb377.i:		; preds = %cond_next363.i
-	br label %cond_next380.i
-
-cond_next380.i:		; preds = %bb377.i, %cond_true349.i, %cond_true343.i.cond_next380.i_crit_edge, %cond_true328.i.cond_next380.i_crit_edge, %cond_next302.i.cond_next380.i_crit_edge, %cond_true216.i, %cond_true210.i.cond_next380.i_crit_edge, %cond_true196.i.cond_next380.i_crit_edge, %cond_true182.i.cond_next380.i_crit_edge, %cond_next163.i.cond_next380.i_crit_edge
-	br i1 false, label %cond_next380.i.HRead.exit_crit_edge, label %cond_true391.i
-
-cond_next380.i.HRead.exit_crit_edge:		; preds = %cond_next380.i
-	br label %HRead.exit
-
-cond_true391.i:		; preds = %cond_next380.i
-	br label %bb442.outer.backedge
-
-bb442.outer.backedge:		; preds = %bb439, %cond_true391.i, %cond_true77.i
-	br label %bb442.outer
-
-HRead.exit.loopexit:		; preds = %cond_true229.i.HRead.exit.loopexit_crit_edge, %cond_next433.HRead.exit.loopexit_crit_edge
-	br label %HRead.exit
-
-HRead.exit:		; preds = %HRead.exit.loopexit, %cond_next380.i.HRead.exit_crit_edge
-	br label %bb439
-
-bb439.loopexit:		; preds = %bb420
-	br label %bb439
-
-bb439:		; preds = %bb439.loopexit, %HRead.exit
-	br label %bb442.outer.backedge
-
-bb442.outer:		; preds = %bb442.outer.backedge, %bb442.preheader
-	br label %bb442
-
-bb442:		; preds = %bb442.outer, %bb442.backedge
-	br i1 false, label %bb420, label %bb.loopexit
-}
-
-define void @Invalidate() {
-entry:
-	br i1 false, label %cond_false, label %cond_true
-
-cond_true:		; preds = %entry
-	br i1 false, label %cond_true40, label %cond_true.cond_next_crit_edge
-
-cond_true.cond_next_crit_edge:		; preds = %cond_true
-	br label %cond_next
-
-cond_true40:		; preds = %cond_true
-	br label %cond_next
-
-cond_next:		; preds = %cond_true40, %cond_true.cond_next_crit_edge
-	br i1 false, label %cond_true68, label %cond_next.cond_next73_crit_edge
-
-cond_next.cond_next73_crit_edge:		; preds = %cond_next
-	br label %cond_next73
-
-cond_true68:		; preds = %cond_next
-	br label %cond_next73
-
-cond_next73:		; preds = %cond_true68, %cond_next.cond_next73_crit_edge
-	br i1 false, label %cond_true91, label %cond_next73.cond_next96_crit_edge
-
-cond_next73.cond_next96_crit_edge:		; preds = %cond_next73
-	br label %cond_next96
-
-cond_true91:		; preds = %cond_next73
-	br label %cond_next96
-
-cond_next96:		; preds = %cond_true91, %cond_next73.cond_next96_crit_edge
-	br i1 false, label %cond_next96.cond_next112_crit_edge, label %cond_true105
-
-cond_next96.cond_next112_crit_edge:		; preds = %cond_next96
-	br label %cond_next112
-
-cond_true105:		; preds = %cond_next96
-	br label %cond_next112
-
-cond_next112:		; preds = %cond_true105, %cond_next96.cond_next112_crit_edge
-	br i1 false, label %cond_next112.cond_next127_crit_edge, label %cond_true119
-
-cond_next112.cond_next127_crit_edge:		; preds = %cond_next112
-	br label %cond_next127
-
-cond_true119:		; preds = %cond_next112
-	br label %cond_next127
-
-cond_next127:		; preds = %cond_true119, %cond_next112.cond_next127_crit_edge
-	br i1 false, label %cond_next141, label %cond_true134
-
-cond_true134:		; preds = %cond_next127
-	br i1 false, label %cond_true134.bb161_crit_edge, label %cond_true134.bb_crit_edge
-
-cond_true134.bb_crit_edge:		; preds = %cond_true134
-	br label %bb
-
-cond_true134.bb161_crit_edge:		; preds = %cond_true134
-	br label %bb161
-
-cond_next141:		; preds = %cond_next127
-	br label %bb154
-
-bb:		; preds = %bb154.bb_crit_edge, %cond_true134.bb_crit_edge
-	br label %bb154
-
-bb154:		; preds = %bb, %cond_next141
-	br i1 false, label %bb154.bb161_crit_edge, label %bb154.bb_crit_edge
-
-bb154.bb_crit_edge:		; preds = %bb154
-	br label %bb
-
-bb154.bb161_crit_edge:		; preds = %bb154
-	br label %bb161
-
-bb161:		; preds = %bb154.bb161_crit_edge, %cond_true134.bb161_crit_edge
-	br i1 false, label %bb161.cond_next201_crit_edge, label %cond_true198
-
-bb161.cond_next201_crit_edge:		; preds = %bb161
-	br label %cond_next201
-
-cond_true198:		; preds = %bb161
-	br label %cond_next201
-
-cond_next201:		; preds = %cond_true198, %bb161.cond_next201_crit_edge
-	br i1 false, label %cond_next212, label %cond_true206
-
-cond_true206:		; preds = %cond_next201
-	br label %UnifiedReturnBlock
-
-cond_false:		; preds = %entry
-	br label %UnifiedReturnBlock
-
-cond_next212:		; preds = %cond_next201
-	br label %UnifiedReturnBlock
-
-UnifiedReturnBlock:		; preds = %cond_next212, %cond_false, %cond_true206
-	ret void
-}
diff --git a/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll b/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll
deleted file mode 100644
index 767e5db9..0000000
--- a/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: opt < %s -postdomfrontier -disable-output
-
-define void @args_out_of_range() {
-entry:
-	br label %bb
-
-bb:		; preds = %bb, %entry
-	br label %bb
-}
-
-define void @args_out_of_range_3() {
-entry:
-	br label %bb
-
-bb:		; preds = %bb, %entry
-	br label %bb
-}
-
-define void @Feq() {
-entry:
-	br i1 false, label %cond_true, label %cond_next
-
-cond_true:		; preds = %entry
-	unreachable
-
-cond_next:		; preds = %entry
-	unreachable
-}
diff --git a/test/Analysis/RegionInfo/next.ll b/test/Analysis/RegionInfo/next.ll
index d986387..377a84d 100644
--- a/test/Analysis/RegionInfo/next.ll
+++ b/test/Analysis/RegionInfo/next.ll
@@ -32,8 +32,8 @@ __label_000020:                                   ; preds = %__label_002001, %bb
 ; CHECK-NOT: =>
 ; CHECK: [0] entry => <Function Return>
 ; CHECK-NEXT:  [1] __label_002001.outer => __label_000020
-; CHECK-NEXT;      [2] bb197 => bb229
-; CHECK-NEXT;            [3] bb224 => bb229
+; CHECK-NEXT:      [2] bb197 => bb229
+; CHECK-NEXT:            [3] bb224 => bb229
 
 ; STAT: 4 region - The # of regions
 ; STAT: 1 region - The # of simple regions
diff --git a/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll b/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll
new file mode 100644
index 0000000..9f17e27
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll
@@ -0,0 +1,34 @@
+; RUN: opt -indvars  %s
+; PR9424: Attempt to use a SCEVCouldNotCompute object!
+; The inner loop computes the Step and Start of the outer loop.
+; Call that Vexit. The outer End value is max(2,Vexit), because
+; the condition "icmp %4 < 2" does not guard the outer loop.
+; SCEV knows that Vexit has range [2,4], so End == Vexit == Start.
+; Now we have ExactBECount == 0. However, MinStart == 2 and MaxEnd == 4.
+; Since the stride is variable and may wrap, we cannot compute
+; MaxBECount. SCEV should override MaxBECount with ExactBECount.
+
+define void @bar() nounwind {
+entry:
+  %. = select i1 undef, i32 2, i32 1
+  br label %"5.preheader"
+
+"4":                                              ; preds = %"5.preheader", %"4"
+  %0 = phi i32 [ 0, %"5.preheader" ], [ %1, %"4" ]
+  %1 = add nsw i32 %0, 1
+  %2 = icmp sgt i32 %., %1
+  br i1 %2, label %"4", label %"9"
+
+"9":                                              ; preds = %"4"
+  %3 = add i32 %6, 1
+  %4 = add i32 %3, %1
+  %5 = icmp slt i32 %4, 2
+  br i1 %5, label %"5.preheader", label %return
+
+"5.preheader":                                    ; preds = %"9", %entry
+  %6 = phi i32 [ 0, %entry ], [ %4, %"9" ]
+  br label %"4"
+
+return:                                           ; preds = %"9"
+  ret void
+}
diff --git a/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll b/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll
new file mode 100644
index 0000000..1600d5f
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -analyze -iv-users
+; PR9633: Tests that SCEV handles the mul.i2 recurrence being folded to
+; constant zero.
+
+define signext i8 @func_14(i8 signext %p_18) nounwind readnone ssp {
+entry:
+  br label %for.inc
+
+for.inc:
+  %p_17.addr.012 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
+  %add = add nsw i32 %p_17.addr.012, 1
+  br i1 false, label %for.inc, label %for.cond
+
+for.cond:
+  %tobool.i = icmp ult i32 %add, 8192
+  %shl.i = select i1 %tobool.i, i32 13, i32 0
+  %shl.left.i = shl i32 %add, %shl.i
+  %conv.i4 = trunc i32 %shl.left.i to i8
+  br i1 undef, label %for.inc9, label %if.then
+
+for.inc9:
+  %p_18.addr.011 = phi i8 [ %add12, %for.inc9 ], [ %p_18, %for.cond ]
+  %add12 = add i8 %p_18.addr.011, 1
+  %mul.i2 = mul i8 %add12, %conv.i4
+  %mul.i2.lobit = lshr i8 %mul.i2, 7
+  %lor.ext.shr.i = select i1 undef, i8 %mul.i2.lobit, i8 %mul.i2
+  %tobool = icmp eq i8 %lor.ext.shr.i, 0
+  br i1 %tobool, label %for.inc9, label %if.then
+
+if.then:
+  ret i8 0
+
+}
+\ No newline at end of file
diff --git a/test/Analysis/ScalarEvolution/nsw-offset.ll b/test/Analysis/ScalarEvolution/nsw-offset.ll
index 4cd9a6d..8969a5a 100644
--- a/test/Analysis/ScalarEvolution/nsw-offset.ll
+++ b/test/Analysis/ScalarEvolution/nsw-offset.ll
@@ -19,11 +19,11 @@ bb:                                               ; preds = %bb.nph, %bb1
   %i.01 = phi i32 [ %16, %bb1 ], [ 0, %bb.nph ]   ; <i32> [#uses=5]
 
 ; CHECK: %1 = sext i32 %i.01 to i64
-; CHECK: -->  {0,+,2}<%bb>
+; CHECK: -->  {0,+,2}<nuw><nsw><%bb>
   %1 = sext i32 %i.01 to i64                      ; <i64> [#uses=1]
 
 ; CHECK: %2 = getelementptr inbounds double* %d, i64 %1
-; CHECK: -->  {%d,+,16}<%bb>
+; CHECK: -->  {%d,+,16}<nsw><%bb>
   %2 = getelementptr inbounds double* %d, i64 %1  ; <double*> [#uses=1]
 
   %3 = load double* %2, align 8                   ; <double> [#uses=1]
@@ -33,11 +33,11 @@ bb:                                               ; preds = %bb.nph, %bb1
   %7 = or i32 %i.01, 1                            ; <i32> [#uses=1]
 
 ; CHECK: %8 = sext i32 %7 to i64
-; CHECK: -->  {1,+,2}<%bb>
+; CHECK: -->  {1,+,2}<nuw><nsw><%bb>
   %8 = sext i32 %7 to i64                         ; <i64> [#uses=1]
 
 ; CHECK: %9 = getelementptr inbounds double* %q, i64 %8
-; CHECK: {(8 + %q),+,16}<%bb>
+; CHECK: {(8 + %q),+,16}<nsw><%bb>
   %9 = getelementptr inbounds double* %q, i64 %8  ; <double*> [#uses=1]
 
 ; Artificially repeat the above three instructions, this time using
@@ -45,11 +45,11 @@ bb:                                               ; preds = %bb.nph, %bb1
   %t7 = add nsw i32 %i.01, 1                            ; <i32> [#uses=1]
 
 ; CHECK: %t8 = sext i32 %t7 to i64
-; CHECK: -->  {1,+,2}<%bb>
+; CHECK: -->  {1,+,2}<nuw><nsw><%bb>
   %t8 = sext i32 %t7 to i64                         ; <i64> [#uses=1]
 
 ; CHECK: %t9 = getelementptr inbounds double* %q, i64 %t8
-; CHECK: {(8 + %q),+,16}<%bb>
+; CHECK: {(8 + %q),+,16}<nsw><%bb>
   %t9 = getelementptr inbounds double* %q, i64 %t8  ; <double*> [#uses=1]
 
   %10 = load double* %9, align 8                  ; <double> [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll
index 9d8e2b6..da35a6c 100644
--- a/test/Analysis/ScalarEvolution/nsw.ll
+++ b/test/Analysis/ScalarEvolution/nsw.ll
@@ -35,7 +35,7 @@ bb:		; preds = %bb1, %bb.nph
 bb1:		; preds = %bb
 	%phitmp = sext i32 %tmp8 to i64		; <i64> [#uses=1]
 ; CHECK: %phitmp
-; CHECK-NEXT: -->  {1,+,1}<%bb>
+; CHECK-NEXT: -->  {1,+,1}<nuw><nsw><%bb>
 	%tmp9 = getelementptr double* %p, i64 %phitmp		; <double*> [#uses=1]
 ; CHECK: %tmp9
 ; CHECK-NEXT:  -->  {(8 + %p),+,8}<%bb>
@@ -62,11 +62,11 @@ for.body.lr.ph.i.i:                               ; preds = %entry
 for.body.i.i:                                     ; preds = %for.body.i.i, %for.body.lr.ph.i.i
   %__first.addr.02.i.i = phi i32* [ %begin, %for.body.lr.ph.i.i ], [ %ptrincdec.i.i, %for.body.i.i ]
 ; CHECK: %__first.addr.02.i.i
-; CHECK-NEXT: -->  {%begin,+,4}<nsw><%for.body.i.i>	
+; CHECK-NEXT: -->  {%begin,+,4}<nw><%for.body.i.i>
   store i32 0, i32* %__first.addr.02.i.i, align 4
   %ptrincdec.i.i = getelementptr inbounds i32* %__first.addr.02.i.i, i64 1
 ; CHECK: %ptrincdec.i.i
-; CHECK-NEXT: -->  {(4 + %begin),+,4}<nsw><%for.body.i.i>
+; CHECK-NEXT: -->  {(4 + %begin),+,4}<nw><%for.body.i.i>
   %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end
   br i1 %cmp.i.i, label %for.cond.for.end_crit_edge.i.i, label %for.body.i.i
 
@@ -88,7 +88,7 @@ for.body.i.i:                                     ; preds = %entry, %for.body.i.
 ; CHECK: %indvar.i.i
 ; CHECK: {0,+,1}<nuw><nsw><%for.body.i.i>
   %tmp = add nsw i64 %indvar.i.i, 1
-; CHECK: %tmp = 
+; CHECK: %tmp =
 ; CHECK: {1,+,1}<nuw><nsw><%for.body.i.i>
   %ptrincdec.i.i = getelementptr inbounds i32* %begin, i64 %tmp
 ; CHECK: %ptrincdec.i.i =
@@ -99,8 +99,8 @@ for.body.i.i:                                     ; preds = %entry, %for.body.i.
   store i32 0, i32* %__first.addr.08.i.i, align 4
   %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end
   br i1 %cmp.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i
-; CHECK: Loop %for.body.i.i: Unpredictable backedge-taken count. 
-; CHECK: Loop %for.body.i.i: Unpredictable max backedge-taken count.
+; CHECK: Loop %for.body.i.i: backedge-taken count is ((-4 + (-1 * %begin) + %end) /u 4)
+; CHECK: Loop %for.body.i.i: max backedge-taken count is ((-4 + (-1 * %begin) + %end) /u 4)
 _ZSt4fillIPiiEvT_S1_RKT0_.exit:                   ; preds = %for.body.i.i, %entry
   ret void
 }
 \ No newline at end of file
diff --git a/test/Analysis/ScalarEvolution/sext-iv-0.ll b/test/Analysis/ScalarEvolution/sext-iv-0.ll
index 2af794f..d5d3268 100644
--- a/test/Analysis/ScalarEvolution/sext-iv-0.ll
+++ b/test/Analysis/ScalarEvolution/sext-iv-0.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -scalar-evolution -analyze \
-; RUN:  | grep { -->  \{-128,+,1\}<%bb1>		Exits: 127} | count 5
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
 
 ; Convert (sext {-128,+,1}) to {sext(-128),+,sext(1)}, since the
 ; trip count is within range where this is safe.
@@ -13,9 +12,17 @@ bb1.thread:
 
 bb1:		; preds = %bb1, %bb1.thread
 	%i.0.reg2mem.0 = phi i64 [ -128, %bb1.thread ], [ %8, %bb1 ]		; <i64> [#uses=3]
+; CHECK: %i.0.reg2mem.0
+; CHECK-NEXT: -->  {-128,+,1}<%bb1>		Exits: 127
 	%0 = trunc i64 %i.0.reg2mem.0 to i8		; <i8> [#uses=1]
+; CHECK: %0
+; CHECK-NEXT: -->  {-128,+,1}<%bb1>		Exits: 127
 	%1 = trunc i64 %i.0.reg2mem.0 to i9		; <i8> [#uses=1]
+; CHECK: %1
+; CHECK-NEXT: -->  {-128,+,1}<%bb1>		Exits: 127
 	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
+; CHECK: %2
+; CHECK-NEXT: -->  {-128,+,1}<nsw><%bb1>	Exits: 127
 	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
 	%4 = load double* %3, align 8		; <double> [#uses=1]
 	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
diff --git a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
new file mode 100644
index 0000000..8f080e2
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
@@ -0,0 +1,27 @@
+; RUN: opt -tbaa -basicaa -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+
+; TBAA should prove that these calls don't interfere, since they are
+; IntrArgReadMem and have TBAA metadata.
+
+; CHECK:      define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+; CHECK-NEXT:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK-NEXT:   %c = add <8 x i16> %a, %a
+define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) {
+entry:
+  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind, !tbaa !2
+  call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16), !tbaa !1
+  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind, !tbaa !2
+  %c = add <8 x i16> %a, %b
+  ret <8 x i16> %c
+}
+
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+
+!0 = metadata !{metadata !"tbaa root", null}
+!1 = metadata !{metadata !"A", metadata !0}
+!2 = metadata !{metadata !"B", metadata !0}
diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll
index 6752bd8..e4e2d3a 100644
--- a/test/Assembler/AutoUpgradeIntrinsics.ll
+++ b/test/Assembler/AutoUpgradeIntrinsics.ll
@@ -7,6 +7,8 @@
 ; RUN: llvm-as < %s | llvm-dis | \
 ; RUN:   not grep {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*}
 ; RUN: llvm-as < %s | llvm-dis | \
+; RUN:   not grep {llvm\\.x86\\.sse2\\.loadu}
+; RUN: llvm-as < %s | llvm-dis | \
 ; RUN:   grep {llvm\\.x86\\.mmx\\.ps} | grep {x86_mmx} | count 16
 
 declare i32 @llvm.ctpop.i28(i28 %val)
@@ -79,3 +81,13 @@ define void @sh64(<1 x i64> %A, <2 x i32> %B) {
 	%r2 = call <1 x i64> @llvm.x86.mmx.psrl.q( <1 x i64> %A, <2 x i32> %B )		; <<1 x i64>> [#uses=0]
 	ret void
 }
+
+declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readnone
+declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readnone
+declare <2 x double> @llvm.x86.sse2.loadu.pd(double*) nounwind readnone
+define void @test_loadu(i8* %a, double* %b) {
+  %v0 = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a)
+  %v1 = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a)
+  %v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b)
+  ret void
+}
diff --git a/test/Assembler/aggregate-return-single-value.ll b/test/Assembler/aggregate-return-single-value.ll
index 02fb59f..04540b5 100644
--- a/test/Assembler/aggregate-return-single-value.ll
+++ b/test/Assembler/aggregate-return-single-value.ll
@@ -1,14 +1,8 @@
 ; RUN: llvm-as < %s | llvm-dis
 
-define { i32 } @fooa() nounwind {
-  ret i32 0
-}
 define { i32 } @foob() nounwind {
   ret {i32}{ i32 0 }
 }
-define [1 x i32] @fooc() nounwind {
-  ret i32 0
-}
 define [1 x i32] @food() nounwind {
   ret [1 x i32][ i32 0 ]
 }
diff --git a/test/Assembler/comment.ll b/test/Assembler/comment.ll
index fe23d26..16362ab 100644
--- a/test/Assembler/comment.ll
+++ b/test/Assembler/comment.ll
@@ -7,7 +7,7 @@
 ; BARE: }
 
 @B = external global i32
-; ANNOT: @B = external global i32   ; [#uses=0]
+; ANNOT: @B = external global i32   ; [#uses=0 type=i32*]
 
 define <4 x i1> @foo(<4 x float> %a, <4 x float> %b) nounwind {
 entry:
@@ -15,6 +15,5 @@ entry:
   ret <4 x i1> %cmp
 }
 
-; ANNOT: %cmp = fcmp olt <4 x float> %a, %b              ; [#uses=1]
-
+; ANNOT: %cmp = fcmp olt <4 x float> %a, %b              ; [#uses=1 type=<4 x i1>]
 
diff --git a/test/Bitcode/neon-intrinsics.ll b/test/Bitcode/neon-intrinsics.ll
index 272cd42..feb2d74 100644
--- a/test/Bitcode/neon-intrinsics.ll
+++ b/test/Bitcode/neon-intrinsics.ll
@@ -76,20 +76,13 @@
 ; CHECK: zext <4 x i16>
 ; CHECK-NEXT: sub <4 x i32>
 
-; vmull should be auto-upgraded to multiply with sext/zext
-; (but vmullp should remain an intrinsic)
+; vmull* intrinsics will remain intrinsics
 
 ; CHECK: vmulls8
-; CHECK-NOT: arm.neon.vmulls.v8i16
-; CHECK: sext <8 x i8>
-; CHECK-NEXT: sext <8 x i8>
-; CHECK-NEXT: mul <8 x i16>
+; CHECK: arm.neon.vmulls.v8i16
 
 ; CHECK: vmullu16
-; CHECK-NOT: arm.neon.vmullu.v4i32
-; CHECK: zext <4 x i16>
-; CHECK-NEXT: zext <4 x i16>
-; CHECK-NEXT: mul <4 x i32>
+; CHECK: arm.neon.vmullu.v4i32
 
 ; CHECK: vmullp8
 ; CHECK: arm.neon.vmullp.v8i16
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index b696682..993b6e2 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -37,14 +37,32 @@ if(PYTHONINTERP_FOUND)
   foreach(INC_DIR ${INC_DIRS})
     set(IDIRS "${IDIRS} -I${INC_DIR}")
   endforeach()
-  string(REPLACE "<CMAKE_CXX_COMPILER>" "${CMAKE_CXX_COMPILER}" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT})
+
+  if( MSVC )
+    # The compiler's path may contain white space. Wrap it:
+    string(REPLACE "<CMAKE_CXX_COMPILER>" "\\\"${CMAKE_CXX_COMPILER}\\\"" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT})
+    # Eliminate continuation lines from NMake flow. PR9680
+    string(REPLACE "@<<\n"                " "                     TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
+    string(REPLACE "\n<<"                 " "                     TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
+  else()
+    string(REPLACE "<CMAKE_CXX_COMPILER>" "${CMAKE_CXX_COMPILER}" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT})
+  endif()
+
   string(REPLACE "<DEFINES>"            "${DEFS}"               TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
   string(REPLACE "<FLAGS>"              "${CMAKE_CXX_FLAGS}"    TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-  string(REPLACE "-o"                   ""                      TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
+  if (MSVC) # PR9680
+    # Eliminate MSVC equivalent of -o
+    string(REPLACE "/Fo<OBJECT>"        ""                      TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
+    # Eliminate "how to rename program database" argument
+    string(REPLACE "/Fd<TARGET_PDB>"    ""                      TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
+  else()
+    string(REPLACE "-o"                 ""                      TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
+  endif(MSVC)
   string(REGEX REPLACE "<[^>]+>"        ""                      TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
   set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} ${IDIRS}")
   if(NOT MSVC)
     set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} -x c++")
+    # MSVC already has /TP to indicate a C++ source file
   endif()
   configure_file(
     ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in
diff --git a/test/CodeGen/ARM/2009-10-27-double-align.ll b/test/CodeGen/ARM/2009-10-27-double-align.ll
index c31b116..b37de9d 100644
--- a/test/CodeGen/ARM/2009-10-27-double-align.ll
+++ b/test/CodeGen/ARM/2009-10-27-double-align.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s  -mtriple=arm-linux-gnueabi  | FileCheck %s
+; RUN: llc < %s  -mtriple=arm-linux-gnueabi | FileCheck %s
+; RUN: llc < %s  -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s
 
 @.str = private constant [1 x i8] zeroinitializer, align 1
 
diff --git a/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll b/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
index d9e1a14..fee8600 100644
--- a/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
+++ b/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
@@ -6,7 +6,7 @@ define i32 @bar(i32 %a) nounwind {
 entry:
   %0 = tail call i32 @foo(i32 %a) nounwind ; <i32> [#uses=1]
   %1 = add nsw i32 %0, 3                          ; <i32> [#uses=1]
-; CHECK: ldmia	sp!, {r11, pc}
+; CHECK: pop {r11, pc}
 ; V4: pop
 ; V4-NEXT: mov pc, lr
   ret i32 %1
diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
index 5ad1c09..df9dbca 100644
--- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
+++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -7,13 +7,13 @@
 define zeroext i8 @t(%struct.foo* %this) noreturn optsize {
 entry:
 ; ARM:       t:
-; ARM:       str r0, [r1], r0
+; ARM:       str r2, [r1], r0
 
 ; THUMB:     t:
 ; THUMB-NOT: str r0, [r1], r0
-; THUMB:     str r0, [r1]
+; THUMB:     str r2, [r1]
   %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
-  store i32 undef, i32* inttoptr (i32 8 to i32*), align 8
+  store i32 0, i32* inttoptr (i32 8 to i32*), align 8
   br i1 undef, label %bb.nph96, label %bb3
 
 bb3:                                              ; preds = %entry
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index f077d04..25d38ed 100644
--- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -1,5 +1,5 @@
-; RUN: llc -O0 -mtriple=arm-apple-darwin < %s | grep DW_OP_fbreg
-; Use DW_OP_fbreg in variable's location expression if the variable is in a stack slot.
+; RUN: llc -O0 -mtriple=arm-apple-darwin < %s | grep DW_OP_breg
+; Use DW_OP_breg in variable's location expression if the variable is in a stack slot.
 
 %struct.SVal = type { i8*, i32 }
 
diff --git a/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll b/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll
index 163c9b0..32d350e 100644
--- a/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll
+++ b/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll
@@ -4,9 +4,9 @@
 ; was being treated as an instruction count.
 
 ; CHECK: push
-; CHECK: ldmia
-; CHECK: ldmia
-; CHECK: ldmia
+; CHECK: pop
+; CHECK: pop
+; CHECK: pop
 
 define i32 @test(i32 %x) {
 entry:
diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
index 8d7541f..e3c18ce 100644
--- a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
+++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
@@ -10,7 +10,7 @@ entry:
 ; ARM: bl _foo
 ; ARM: bl _foo
 ; ARM: bl _foo
-; ARM: ldmia sp!, {r7, pc}
+; ARM: pop {r7, pc}
 
 ; THUMB2: t:
 ; THUMB2: push
diff --git a/test/CodeGen/ARM/2010-12-13-reloc-pic.ll b/test/CodeGen/ARM/2010-12-13-reloc-pic.ll
deleted file mode 100644
index d5aefbe..0000000
--- a/test/CodeGen/ARM/2010-12-13-reloc-pic.ll
+++ /dev/null
@@ -1,100 +0,0 @@
-; RUN: llc  %s -mtriple=armv7-linux-gnueabi -relocation-model=pic -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=PIC01 %s
-
-;; FIXME: Reduce this test further, or even better,
-;; redo as .s -> .o test once ARM AsmParser is working better
-
-; ModuleID = 'large2.pnacl.bc'
-target triple = "armv7-none-linux-gnueabi"
-
-%struct._Bigint = type { %struct._Bigint*, i32, i32, i32, i32, [1 x i32] }
-%struct.__FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, %struct._reent*, i8*, i32 (%struct._reent*, i8*, i8*, i32)*, i32 (%struct._reent*, i8*, i8*, i32)*, i32 (%struct._reent*, i8*, i32, i32)*, i32 (%struct._reent*, i8*)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i32, %struct._flock_t, %struct._mbstate_t, i32 }
-%struct.__sbuf = type { i8*, i32 }
-%struct.__tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-%struct._atexit = type { %struct._atexit*, i32, [32 x void ()*], %struct._on_exit_args* }
-%struct._flock_t = type { i32, i32, i32, i32, i32 }
-%struct._glue = type { %struct._glue*, i32, %struct.__FILE* }
-%struct._mbstate_t = type { i32, %union.anon }
-%struct._misc_reent = type { i8*, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, [8 x i8], i32, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t }
-%struct._mprec = type { %struct._Bigint*, i32, %struct._Bigint*, %struct._Bigint** }
-%struct._on_exit_args = type { [32 x i8*], [32 x i8*], i32, i32 }
-%struct._rand48 = type { [3 x i16], [3 x i16], i16, i64 }
-%struct._reent = type { %struct.__FILE*, %struct.__FILE*, %struct.__FILE*, i32, i32, i8*, i32, i32, i8*, %struct._mprec*, void (%struct._reent*)*, i32, i32, i8*, %struct._rand48*, %struct.__tm*, i8*, void (i32)**, %struct._atexit*, %struct._atexit, %struct._glue, %struct.__FILE*, %struct._misc_reent*, i8* }
-%union.anon = type { i32 }
-
-@buf = constant [2 x i8] c"x\00", align 4
-@_impure_ptr = external thread_local global %struct._reent*
-@.str = private constant [22 x i8] c"This should fault...\0A\00", align 4
-@.str1 = private constant [40 x i8] c"We're still running. This is not good.\0A\00", align 4
-
-define i32 @main() nounwind {
-entry:
-  %0 = load %struct._reent** @_impure_ptr, align 4
-  %1 = getelementptr inbounds %struct._reent* %0, i32 0, i32 1
-  %2 = load %struct.__FILE** %1, align 4
-  %3 = bitcast %struct.__FILE* %2 to i8*
-  %4 = tail call i32 @fwrite(i8* getelementptr inbounds ([22 x i8]* @.str, i32 0, i32 0), i32 1, i32 21, i8* %3) nounwind
-  %5 = load %struct._reent** @_impure_ptr, align 4
-  %6 = getelementptr inbounds %struct._reent* %5, i32 0, i32 1
-  %7 = load %struct.__FILE** %6, align 4
-  %8 = tail call i32 @fflush(%struct.__FILE* %7) nounwind
-  store i8 121, i8* getelementptr inbounds ([2 x i8]* @buf, i32 0, i32 0), align 4
-  %9 = load %struct._reent** @_impure_ptr, align 4
-  %10 = getelementptr inbounds %struct._reent* %9, i32 0, i32 1
-  %11 = load %struct.__FILE** %10, align 4
-  %12 = bitcast %struct.__FILE* %11 to i8*
-  %13 = tail call i32 @fwrite(i8* getelementptr inbounds ([40 x i8]* @.str1, i32 0, i32 0), i32 1, i32 39, i8* %12) nounwind
-  ret i32 1
-}
-
-
-; PIC01:             Relocation 0x00000000
-; PIC01-NEXT:        'r_offset', 0x0000001c
-; PIC01-NEXT:          'r_sym'
-; PIC01-NEXT:          'r_type', 0x0000001b
-
-
-; PIC01:             Relocation 0x00000001
-; PIC01-NEXT:      'r_offset', 0x00000038
-; PIC01-NEXT:        'r_sym'
-; PIC01-NEXT:        'r_type', 0x0000001b
-
-; PIC01:              Relocation 0x00000002
-; PIC01-NEXT:      'r_offset', 0x00000044
-; PIC01-NEXT:        'r_sym'
-; PIC01-NEXT:        'r_type', 0x0000001b
-
-; PIC01:              Relocation 0x00000003
-; PIC01-NEXT:      'r_offset', 0x00000070
-; PIC01-NEXT:        'r_sym'
-; PIC01-NEXT:        'r_type', 0x0000001b
-
-; PIC01:              Relocation 0x00000004
-; PIC01-NEXT:      'r_offset', 0x0000007c
-; PIC01-NEXT:        'r_sym'
-; PIC01-NEXT:        'r_type', 0x00000019
-
-
-; PIC01:              Relocation 0x00000005
-; PIC01-NEXT:      'r_offset', 0x00000080
-; PIC01-NEXT:        'r_sym'
-; PIC01-NEXT:        'r_type', 0x00000018
-
-; PIC01:              Relocation 0x00000006
-; PIC01-NEXT:      'r_offset', 0x00000084
-; PIC01-NEXT:        'r_sym'
-; PIC01-NEXT:        'r_type', 0x00000068
-
-; PIC01:              Relocation 0x00000007
-; PIC01-NEXT:      'r_offset', 0x00000088
-; PIC01-NEXT:        'r_sym'
-; PIC01-NEXT:        'r_type', 0x0000001a
-
-; PIC01:              Relocation 0x00000008
-; PIC01-NEXT:      'r_offset', 0x0000008c
-; PIC01-NEXT:        'r_sym'
-; PIC01-NEXT:        'r_type', 0x00000018
-
-declare i32 @fwrite(i8* nocapture, i32, i32, i8* nocapture) nounwind
-
-declare i32 @fflush(%struct.__FILE* nocapture) nounwind
diff --git a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
index eaa34e7..69d4a14 100644
--- a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
+++ b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
@@ -10,12 +10,12 @@
 @STRIDE = internal global i32 8
 
 ; ASM:          .type   array00,%object         @ @array00
-; ASM-NEXT:     .lcomm  array00,80              @ @array00
+; ASM-NEXT:     .lcomm  array00,80
 ; ASM-NEXT:     .type   _MergedGlobals,%object  @ @_MergedGlobals
 
 
 
-; OBJ:          Section 0x00000003
+; OBJ:          Section 0x00000004
 ; OBJ-NEXT:     '.bss'
 
 ; OBJ:          'array00'
@@ -24,7 +24,7 @@
 ; OBJ-NEXT:     'st_bind', 0x00000000
 ; OBJ-NEXT:     'st_type', 0x00000001
 ; OBJ-NEXT:     'st_other', 0x00000000
-; OBJ-NEXT:     'st_shndx', 0x00000003
+; OBJ-NEXT:     'st_shndx', 0x00000004
 
 define i32 @main(i32 %argc) nounwind {
   %1 = load i32* @sum, align 4
diff --git a/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll b/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll
new file mode 100644
index 0000000..81babe0
--- /dev/null
+++ b/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8
+
+; rdar://9117613
+
+%struct.mo = type { i32, %struct.mo_pops* }
+%struct.mo_pops = type { void (%struct.mo*)*, void (%struct.mo*)*, i32 (%struct.mo*, i32*, i32)*, i32 (%struct.mo*)*, i32 (%struct.mo*, i64, i32, i32, i32*, i64, i32)*, i32 (%struct.mo*, i64, i32, i64*, i32*, i32, i32, i32)*, i32 (%struct.mo*, i64, i32)*, i32 (%struct.mo*, i64, i64, i32)*, i32 (%struct.mo*, i64, i64, i32)*, i32 (%struct.mo*, i32)*, i32 (%struct.mo*)*, i32 (%struct.mo*, i32)*, i8* }
+%struct.ui = type { %struct.mo*, i32*, i32, i32*, i32*, i64, i32*, i32*, i32* }
+
+
+define internal fastcc i32 @t(i32* %vp, i32 %withfsize, i64 %filesize) nounwind {
+entry:
+  br i1 undef, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb1:                                              ; preds = %entry
+  %0 = call %struct.ui* @vn_pp_to_ui(i32* undef) nounwind
+  call void @llvm.memset.p0i8.i32(i8* undef, i8 0, i32 40, i32 4, i1 false)
+  %1 = getelementptr inbounds %struct.ui* %0, i32 0, i32 0
+  store %struct.mo* undef, %struct.mo** %1, align 4
+  %2 = getelementptr inbounds %struct.ui* %0, i32 0, i32 5
+  %3 = load i64* %2, align 4
+  %4 = call i32 @mo_create_nnm(%struct.mo* undef, i64 %3, i32** undef) nounwind
+  br i1 undef, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  unreachable
+
+bb3:                                              ; preds = %bb1
+  br i1 undef, label %bb4, label %bb6
+
+bb4:                                              ; preds = %bb3
+  %5 = call i32 @vn_size(i32* %vp, i64* %2, i32* undef) nounwind
+  unreachable
+
+bb6:                                              ; preds = %bb3
+  ret i32 0
+}
+
+declare %struct.ui* @vn_pp_to_ui(i32*)
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+declare i32 @mo_create_nnm(%struct.mo*, i64, i32**)
+
+declare i32 @vn_size(i32*, i64*, i32*)
diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
new file mode 100644
index 0000000..ccda281e9
--- /dev/null
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+
+; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4.
+; rdar://9133587
+
+%struct.Outer = type { i32, [2 x %"struct.Outer::Inner"] }
+%"struct.Outer::Inner" = type { i32, i32, i8, i8 }
+
+@oStruct = external global %struct.Outer, align 4
+
+define void @main() nounwind {
+; CHECK: main:
+; CHECK-NOT: ldrd
+; CHECK: mul
+for.body.lr.ph:
+  br label %for.body
+
+for.body:                                         ; preds = %_Z14printIsNotZeroi.exit17.for.body_crit_edge, %for.body.lr.ph
+  %tmp3 = phi i1 [ false, %for.body.lr.ph ], [ %phitmp27, %_Z14printIsNotZeroi.exit17.for.body_crit_edge ]
+  %i.022 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %_Z14printIsNotZeroi.exit17.for.body_crit_edge ]
+  %x = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 0
+  %y = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 1
+  %inc = add i32 %i.022, 1
+  br i1 %tmp3, label %_Z14printIsNotZeroi.exit, label %if.then.i
+
+if.then.i:                                        ; preds = %for.body
+  unreachable
+
+_Z14printIsNotZeroi.exit:                         ; preds = %for.body
+  %tmp8 = load i32* %x, align 4, !tbaa !0
+  %tmp11 = load i32* %y, align 4, !tbaa !0
+  %mul = mul nsw i32 %tmp11, %tmp8
+  %tobool.i14 = icmp eq i32 %mul, 0
+  br i1 %tobool.i14, label %_Z14printIsNotZeroi.exit17, label %if.then.i16
+
+if.then.i16:                                      ; preds = %_Z14printIsNotZeroi.exit
+  unreachable
+
+_Z14printIsNotZeroi.exit17:                       ; preds = %_Z14printIsNotZeroi.exit
+  br i1 undef, label %_Z14printIsNotZeroi.exit17.for.body_crit_edge, label %for.end
+
+_Z14printIsNotZeroi.exit17.for.body_crit_edge:    ; preds = %_Z14printIsNotZeroi.exit17
+  %b.phi.trans.insert = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %inc, i32 3
+  %tmp3.pre = load i8* %b.phi.trans.insert, align 1, !tbaa !3
+  %phitmp27 = icmp eq i8 undef, 0
+  br label %for.body
+
+for.end:                                          ; preds = %_Z14printIsNotZeroi.exit17
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"bool", metadata !1}
diff --git a/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
new file mode 100644
index 0000000..7c9af6f
--- /dev/null
+++ b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+
+; subs r4, #1
+; cmp r4, 0
+; bgt
+; cmp cannot be optimized away since it will clear the overflow bit.
+; gt / ge, lt, le conditions all depend on V bit.
+; rdar://9172742
+
+define i32 @t() nounwind {
+; CHECK: t:
+entry:
+  br label %bb2
+
+bb:                                               ; preds = %bb2
+  %0 = tail call i32 @rand() nounwind
+  %1 = icmp eq i32 %0, 50
+  br i1 %1, label %bb3, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp = tail call i32 @puts() nounwind
+  %indvar.next = add i32 %indvar, 1
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %entry
+; CHECK: bb2
+; CHECK: subs [[REG:r[0-9]+]], #1
+; CHECK: cmp [[REG]], #0
+; CHECK: bgt
+  %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
+  %tries.0 = sub i32 2147483647, %indvar
+  %tmp1 = icmp sgt i32 %tries.0, 0
+  br i1 %tmp1, label %bb, label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  ret i32 0
+}
+
+declare i32 @rand()
+
+declare i32 @puts() nounwind
diff --git a/test/CodeGen/ARM/2011-04-07-schediv.ll b/test/CodeGen/ARM/2011-04-07-schediv.ll
new file mode 100644
index 0000000..a61908f
--- /dev/null
+++ b/test/CodeGen/ARM/2011-04-07-schediv.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s
+; Tests preRAsched support for VRegCycle interference.
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+define void @t(i32 %src_width, float* nocapture %src_copy_start, float* nocapture %dst_copy_start, i32 %src_copy_start_index) nounwind optsize {
+entry:
+  %src_copy_start6 = bitcast float* %src_copy_start to i8*
+  %0 = icmp eq i32 %src_width, 0
+  br i1 %0, label %return, label %bb
+
+; Make sure the scheduler schedules all uses of the preincrement
+; induction variable before defining the postincrement value.
+; CHECK: t:
+; CHECK-NOT: mov
+bb:                                               ; preds = %entry, %bb
+  %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
+  %tmp = mul i32 %j.05, %src_copy_start_index
+  %uglygep = getelementptr i8* %src_copy_start6, i32 %tmp
+  %src_copy_start_addr.04 = bitcast i8* %uglygep to float*
+  %dst_copy_start_addr.03 = getelementptr float* %dst_copy_start, i32 %j.05
+  %1 = load float* %src_copy_start_addr.04, align 4
+  store float %1, float* %dst_copy_start_addr.03, align 4
+  %2 = add i32 %j.05, 1
+  %exitcond = icmp eq i32 %2, %src_width
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
new file mode 100644
index 0000000..a9dd971
--- /dev/null
+++ b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+
+; Overly aggressive LICM simply adds copies of constants
+; rdar://9266679
+
+define zeroext i1 @t(i32* nocapture %A, i32 %size, i32 %value) nounwind readonly ssp {
+; CHECK: t:
+entry:
+  br label %for.cond
+
+for.cond:
+  %0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ult i32 %0, %size
+  br i1 %cmp, label %for.body, label %return
+
+for.body:
+; CHECK: %for.body
+; CHECK: movs r{{[0-9]+}}, #1
+  %arrayidx = getelementptr i32* %A, i32 %0
+  %tmp4 = load i32* %arrayidx, align 4
+  %cmp6 = icmp eq i32 %tmp4, %value
+  br i1 %cmp6, label %return, label %for.inc
+
+; CHECK: %for.cond
+; CHECK: movs r{{[0-9]+}}, #0
+
+for.inc:
+  %inc = add i32 %0, 1
+  br label %for.cond
+
+return:
+  %retval.0 = phi i1 [ true, %for.body ], [ false, %for.cond ]
+  ret i1 %retval.0
+}
diff --git a/test/CodeGen/ARM/2011-04-12-AlignBug.ll b/test/CodeGen/ARM/2011-04-12-AlignBug.ll
new file mode 100644
index 0000000..317be94
--- /dev/null
+++ b/test/CodeGen/ARM/2011-04-12-AlignBug.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10.0.0"
+
+; CHECK: align 3
+@.v = linker_private unnamed_addr constant <4 x i32> <i32 1, i32 2, i32 3, i32 4>, align 8
+; CHECK: align 2
+@.strA = linker_private unnamed_addr constant [4 x i8] c"bar\00"
+; CHECK-NOT: align
+@.strB = linker_private unnamed_addr constant [4 x i8] c"foo\00", align 1
+@.strC = linker_private unnamed_addr constant [4 x i8] c"baz\00", section "__TEXT,__cstring,cstring_literals", align 1
diff --git a/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll b/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
new file mode 100644
index 0000000..eb23de0
--- /dev/null
+++ b/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -regalloc=fast
+; Previously we'd crash as out of registers on this input by clobbering all of
+; the aliases.
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10.0.0"
+
+define void @_Z8TestCasev() nounwind ssp {
+entry:
+  %a = alloca float, align 4
+  %tmp = load float* %a, align 4
+  call void asm sideeffect "", "w,~{s0},~{s16}"(float %tmp) nounwind, !srcloc !0
+  ret void
+}
+
+!0 = metadata !{i32 109}
diff --git a/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll b/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll
new file mode 100644
index 0000000..e712e08
--- /dev/null
+++ b/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
+
+; CHECK: _f
+; CHECK-NOT: ands
+; CHECK: cmp
+; CHECK: blxle _g
+
+define i32 @f(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %and = and i32 %b, %a
+  %cmp = icmp slt i32 %and, 1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @g(i32 %a, i32 %b) nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 %and
+}
+
+declare void @g(...)
diff --git a/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll b/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll
new file mode 100644
index 0000000..5404cf5
--- /dev/null
+++ b/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
+
+; CHECK: _f
+; CHECK: adds
+; CHECK-NOT: cmp
+; CHECK: blxeq _h
+
+define i32 @f(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %add = add nsw i32 %b, %a
+  %cmp = icmp eq i32 %add, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @h(i32 %a, i32 %b) nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 %add
+}
+
+; CHECK: _g
+; CHECK: orrs
+; CHECK-NOT: cmp
+; CHECK: blxeq _h
+
+define i32 @g(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %add = or i32 %b, %a
+  %cmp = icmp eq i32 %add, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @h(i32 %a, i32 %b) nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 %add
+}
+
+declare void @h(...)
diff --git a/test/CodeGen/ARM/2011-04-26-SchedTweak.ll b/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
new file mode 100644
index 0000000..ed7dd03
--- /dev/null
+++ b/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s
+
+; Do not move the umull above previous call which would require use of
+; more callee-saved registers and introduce copies.
+; rdar://9329627
+
+%struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* }
+%struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 }
+
+@FuncPtr = external hidden unnamed_addr global %struct.FF*
+@.str1 = external hidden unnamed_addr constant [6 x i8], align 4
+@G = external unnamed_addr global i32
+@.str2 = external hidden unnamed_addr constant [58 x i8], align 4
+@.str3 = external hidden unnamed_addr constant [58 x i8], align 4
+
+define i32 @test() nounwind optsize ssp {
+entry:
+; CHECK: test:
+; CHECK: push
+; CHECK-NOT: push
+  %block_size = alloca i32, align 4
+  %block_count = alloca i32, align 4
+  %index_cache = alloca i32, align 4
+  store i32 0, i32* %index_cache, align 4
+  %tmp = load i32* @G, align 4
+  %tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind
+  switch i32 %tmp1, label %bb8 [
+    i32 0, label %bb
+    i32 536870913, label %bb4
+    i32 536870914, label %bb6
+  ]
+
+bb:
+  %tmp2 = load i32* @G, align 4
+  %tmp4 = icmp eq i32 %tmp2, 0
+  br i1 %tmp4, label %bb1, label %bb8
+
+bb1:
+; CHECK: %bb1
+; CHECK-NOT: umull
+; CHECK: blx _Get
+; CHECK: umull
+; CHECK: blx _foo
+  %tmp5 = load i32* %block_size, align 4
+  %tmp6 = load i32* %block_count, align 4
+  %tmp7 = call %struct.FF* @Get() nounwind
+  store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4
+  %tmp10 = zext i32 %tmp6 to i64
+  %tmp11 = zext i32 %tmp5 to i64
+  %tmp12 = mul nsw i64 %tmp10, %tmp11
+  %tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind
+  br label %bb8
+
+bb4:
+  ret i32 0
+
+bb6:
+  ret i32 1
+
+bb8:
+  ret i32 -1
+}
+
+declare i32 @printf(i8*, ...)
+
+declare %struct.FF* @Get()
+
+declare i32 @foo(i8*, i64, i32)
+
+declare i32 @bar(i32, i32, i32)
diff --git a/test/CodeGen/ARM/2011-04-27-IfCvtBug.ll b/test/CodeGen/ARM/2011-04-27-IfCvtBug.ll
new file mode 100644
index 0000000..0741049
--- /dev/null
+++ b/test/CodeGen/ARM/2011-04-27-IfCvtBug.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios
+
+; If converter was being too cute. It look for root BBs (which don't have
+; successors) and use inverse depth first search to traverse the BBs. However
+; that doesn't work when the CFG has infinite loops. Simply do a linear
+; traversal of all BBs work just fine.
+
+; rdar://9344645
+
+%struct.hc = type { i32, i32, i32, i32 }
+
+define i32 @t(i32 %type) optsize {
+entry:
+  br i1 undef, label %if.then, label %if.else
+
+if.then:
+  unreachable
+
+if.else:
+  br i1 undef, label %if.then15, label %if.else18
+
+if.then15:
+  unreachable
+
+if.else18:
+  switch i32 %type, label %if.else173 [
+    i32 3, label %if.then115
+    i32 1, label %if.then102
+  ]
+
+if.then102:
+  br i1 undef, label %cond.true10.i, label %t.exit
+
+cond.true10.i:
+  br label %t.exit
+
+t.exit:
+  unreachable
+
+if.then115:
+  br i1 undef, label %if.else163, label %if.else145
+
+if.else145:
+  %call150 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34865152) optsize
+  br label %while.body172
+
+if.else163:
+  %call168 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34078720) optsize
+  br label %while.body172
+
+while.body172:
+  br label %while.body172
+
+if.else173:
+  ret i32 -1
+}
+
+declare hidden fastcc %struct.hc* @foo(%struct.hc* nocapture, i32) nounwind optsize
+
diff --git a/test/CodeGen/ARM/align.ll b/test/CodeGen/ARM/align.ll
index d57c159..9589e72 100644
--- a/test/CodeGen/ARM/align.ll
+++ b/test/CodeGen/ARM/align.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ELF
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm-apple-darwin10 | FileCheck %s -check-prefix=DARWIN
 
 @a = global i1 true
 ; no alignment
diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll
index c7fcb97..a8b42e6 100644
--- a/test/CodeGen/ARM/arguments.ll
+++ b/test/CodeGen/ARM/arguments.ll
@@ -14,7 +14,7 @@ define i32 @f1(i32 %a, i64 %b) {
 define i32 @f2() nounwind optsize {
 ; ELF: f2:
 ; ELF: mov  [[REGISTER:(r[0-9]+)]], #128
-; ELF: str  [[REGISTER]], [sp]
+; ELF: str  [[REGISTER]], [
 ; DARWIN: f2:
 ; DARWIN: mov	r3, #128
 entry:
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 50c638b..0762070 100644
--- a/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -23,15 +23,15 @@ tailrecurse:                                      ; preds = %sw.bb, %entry
   %tmp2 = load i8** %scevgep5
   %0 = ptrtoint i8* %tmp2 to i32
 
-; ARM:      ands r12, r12, #3
+; ARM:      ands {{r[0-9]+}}, {{r[0-9]+}}, #3
 ; ARM-NEXT: beq
 
-; THUMB:      movs r5, #3
-; THUMB-NEXT: ands r5, r4
-; THUMB-NEXT: cmp r5, #0
+; THUMB:      movs r[[R0:[0-9]+]], #3
+; THUMB-NEXT: ands r[[R0]], r
+; THUMB-NEXT: cmp r[[R0]], #0
 ; THUMB-NEXT: beq
 
-; T2:      ands r12, r12, #3
+; T2:      ands {{r[0-9]+}}, {{r[0-9]+}}, #3
 ; T2-NEXT: beq
 
   %and = and i32 %0, 3
diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll
index 382a183..95edaad 100644
--- a/test/CodeGen/ARM/arm-returnaddr.ll
+++ b/test/CodeGen/ARM/arm-returnaddr.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
 ; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
 ; rdar://8015977
 ; rdar://8020118
 
@@ -16,7 +18,7 @@ define i8* @rt2() nounwind readnone {
 entry:
 ; CHECK: rt2:
 ; CHECK: {r7, lr}
-; CHECK: ldr r0, [r7]
+; CHECK: ldr r[[R0:[0-9]+]], [r7]
 ; CHECK: ldr r0, [r0]
 ; CHECK: ldr r0, [r0, #4]
   %0 = tail call i8* @llvm.returnaddress(i32 2)
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
new file mode 100644
index 0000000..d0c4f3a
--- /dev/null
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; Avoid some 's' 16-bit instruction which partially update CPSR (and add false
+; dependency) when it isn't dependent on last CPSR defining instruction.
+; rdar://8928208
+
+define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
+ entry:
+; CHECK: t:
+; CHECK: muls r2, r3, r2
+; CHECK-NEXT: mul  r0, r0, r1
+; CHECK-NEXT: muls r0, r2, r0
+  %0 = mul nsw i32 %a, %b
+  %1 = mul nsw i32 %c, %d
+  %2 = mul nsw i32 %0, %1
+  ret i32 %2
+}
diff --git a/test/CodeGen/ARM/bx_fold.ll b/test/CodeGen/ARM/bx_fold.ll
index 09f1aae..5533038 100644
--- a/test/CodeGen/ARM/bx_fold.ll
+++ b/test/CodeGen/ARM/bx_fold.ll
@@ -24,7 +24,7 @@ bb1:		; preds = %bb, %entry
 
 bb18:		; preds = %bb1
 ; CHECK-NOT: bx
-; CHECK: ldmia sp!
+; CHECK: pop
 	ret void
 }
 
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index a77aba0..4dc37aa 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -74,7 +74,7 @@ entry:
 ; CHECKT2: t7:
 ; CHECKT2: blxeq _foo
 ; CHECKT2-NEXT: pop.w
-; CHECKT2-NEXT: b.w _foo
+; CHECKT2-NEXT: b _foo
   br i1 undef, label %bb, label %bb1.lr.ph
 
 bb1.lr.ph:
diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll
index a6a7ed6..9b90408 100644
--- a/test/CodeGen/ARM/carry.ll
+++ b/test/CodeGen/ARM/carry.ll
@@ -19,3 +19,20 @@ entry:
 	%tmp2 = sub i64 %tmp1, %b
 	ret i64 %tmp2
 }
+
+; add with live carry
+define i64 @f3(i32 %al, i32 %bl) {
+; CHECK: f3:
+; CHECK: adds r
+; CHECK: adcs r
+; CHECK: adc r
+entry:
+        ; unsigned wide add
+        %aw = zext i32 %al to i64
+        %bw = zext i32 %bl to i64
+        %cw = add i64 %aw, %bw
+        ; ch == carry bit
+        %ch = lshr i64 %cw, 32
+	%dw = add i64 %ch, %bw
+	ret i64 %dw
+}
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
index 845be8c..91ef659 100644
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin -cgp-critical-edge-splitting=0 | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
 ; PHI elimination shouldn't break backedge.
 ; rdar://8263994
 
@@ -72,7 +72,7 @@ bb2.preheader:                                    ; preds = %bb3, %bb.nph15
   br i1 %4, label %bb1, label %bb3
 
 ; CHECK: LBB1_[[RET]]: @ %bb5
-; CHECK: ldmia sp!
+; CHECK: pop
 bb5:                                              ; preds = %bb3, %entry
   %sum.1.lcssa = phi i32 [ 0, %entry ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=1]
   ret i32 %sum.1.lcssa
diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll
index 542cf02..7893df7 100644
--- a/test/CodeGen/ARM/constants.ll
+++ b/test/CodeGen/ARM/constants.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=arm -disable-cgp-branch-opts | FileCheck %s
 
 define i32 @f1() {
 ; CHECK: f1
@@ -14,31 +14,31 @@ define i32 @f2() {
 
 define i32 @f3() {
 ; CHECK: f3
-; CHECK: mov r0, #1, 24
+; CHECK: mov r0, #1, #24
         ret i32 256
 }
 
 define i32 @f4() {
 ; CHECK: f4
-; CHECK: orr{{.*}}#1, 24
+; CHECK: orr{{.*}}#1, #24
         ret i32 257
 }
 
 define i32 @f5() {
 ; CHECK: f5
-; CHECK: mov r0, #255, 2
+; CHECK: mov r0, #255, #2
         ret i32 -1073741761
 }
 
 define i32 @f6() {
 ; CHECK: f6
-; CHECK: mov r0, #63, 28
+; CHECK: mov r0, #63, #28
         ret i32 1008
 }
 
 define void @f7(i32 %a) {
 ; CHECK: f7
-; CHECK: cmp r0, #1, 16
+; CHECK: cmp r0, #1, #16
         %b = icmp ugt i32 %a, 65536
         br i1 %b, label %r, label %r
 r:
diff --git a/test/CodeGen/ARM/crash-greedy.ll b/test/CodeGen/ARM/crash-greedy.ll
new file mode 100644
index 0000000..8a865e2
--- /dev/null
+++ b/test/CodeGen/ARM/crash-greedy.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -regalloc=greedy -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -verify-machineinstrs | FileCheck %s
+;
+; ARM tests that crash or fail with the greedy register allocator.
+
+target triple = "thumbv7-apple-darwin"
+
+declare double @exp(double)
+
+; CHECK: remat_subreg
+define void @remat_subreg(float* nocapture %x, i32* %y, i32 %n, i32 %z, float %c, float %lambda, float* nocapture %ret_f, float* nocapture %ret_df) nounwind {
+entry:
+  %conv16 = fpext float %lambda to double
+  %mul17 = fmul double %conv16, -1.000000e+00
+  br i1 undef, label %cond.end.us, label %cond.end
+
+cond.end.us:                                      ; preds = %entry
+  unreachable
+
+cond.end:                                         ; preds = %cond.end, %entry
+  %mul = fmul double undef, 0.000000e+00
+  %add = fadd double undef, %mul
+  %add46 = fadd double undef, undef
+  %add75 = fadd double 0.000000e+00, undef
+  br i1 undef, label %for.end, label %cond.end
+
+for.end:                                          ; preds = %cond.end
+  %conv78 = sitofp i32 %z to double
+  %conv83 = fpext float %c to double
+  %mul84 = fmul double %mul17, %conv83
+  %call85 = tail call double @exp(double %mul84) nounwind
+  %mul86 = fmul double %conv78, %call85
+  %add88 = fadd double 0.000000e+00, %mul86
+; CHECK: blx _exp
+  %call100 = tail call double @exp(double %mul84) nounwind
+  %mul101 = fmul double undef, %call100
+  %add103 = fadd double %add46, %mul101
+  %mul111 = fmul double undef, %conv83
+  %mul119 = fmul double %mul111, undef
+  %add121 = fadd double undef, %mul119
+  %div = fdiv double 1.000000e+00, %conv16
+  %div126 = fdiv double %add, %add75
+  %sub = fsub double %div, %div126
+  %div129 = fdiv double %add103, %add88
+  %add130 = fadd double %sub, %div129
+  %conv131 = fptrunc double %add130 to float
+  store float %conv131, float* %ret_f, align 4
+  %mul139 = fmul double %div129, %div129
+  %div142 = fdiv double %add121, %add88
+  %sub143 = fsub double %mul139, %div142
+; %lambda is passed on the stack, and the stack slot load is rematerialized.
+; The rematted load of a float constrains the D register used for the mul.
+; CHECK: vldr
+  %mul146 = fmul float %lambda, %lambda
+  %conv147 = fpext float %mul146 to double
+  %div148 = fdiv double 1.000000e+00, %conv147
+  %sub149 = fsub double %sub143, %div148
+  %conv150 = fptrunc double %sub149 to float
+  store float %conv150, float* %ret_df, align 4
+  ret void
+}
+
+; CHECK: insert_elem
+; This test has a sub-register copy with a kill flag:
+;   %vreg6:ssub_3<def> = COPY %vreg6:ssub_2<kill>; QPR_VFP2:%vreg6
+; The rewriter must do something sensible with that, or the scavenger crashes.
+define void @insert_elem() nounwind {
+entry:
+  br i1 undef, label %if.end251, label %if.then84
+
+if.then84:                                        ; preds = %entry
+  br i1 undef, label %if.end251, label %if.then195
+
+if.then195:                                       ; preds = %if.then84
+  %div = fdiv float 1.000000e+00, undef
+  %vecinit207 = insertelement <4 x float> undef, float %div, i32 1
+  %vecinit208 = insertelement <4 x float> %vecinit207, float 1.000000e+00, i32 2
+  %vecinit209 = insertelement <4 x float> %vecinit208, float 1.000000e+00, i32 3
+  %mul216 = fmul <4 x float> zeroinitializer, %vecinit209
+  store <4 x float> %mul216, <4 x float>* undef, align 16
+  br label %if.end251
+
+if.end251:                                        ; preds = %if.then195, %if.then84, %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
new file mode 100644
index 0000000..8c9095e
--- /dev/null
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s - | FileCheck %s
+; Radar 9309221
+; Test dwarf reg no for d16
+;CHECK: DW_OP_regx
+;CHECK-NEXT: 272
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+@.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00", align 4
+@.str1 = private unnamed_addr constant [6 x i8] c"point\00", align 4
+
+define i32 @inlineprinter(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !19), !dbg !26
+  tail call void @llvm.dbg.value(metadata !{double %val}, i64 0, metadata !20), !dbg !26
+  tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !21), !dbg !26
+  %0 = zext i8 %c to i32, !dbg !27
+  %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !27
+  ret i32 0, !dbg !29
+}
+
+define i32 @printer(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize noinline {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !16), !dbg !30
+  tail call void @llvm.dbg.value(metadata !{double %val}, i64 0, metadata !17), !dbg !30
+  tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !18), !dbg !30
+  %0 = zext i8 %c to i32, !dbg !31
+  %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !31
+  ret i32 0, !dbg !33
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !22), !dbg !34
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !23), !dbg !34
+  %0 = sitofp i32 %argc to double, !dbg !35
+  %1 = fadd double %0, 5.555552e+05, !dbg !35
+  tail call void @llvm.dbg.value(metadata !{double %1}, i64 0, metadata !24), !dbg !35
+  %2 = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0)) nounwind, !dbg !36
+  %3 = getelementptr inbounds i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !37
+  %4 = trunc i32 %argc to i8, !dbg !37
+  %5 = add i8 %4, 97, !dbg !37
+  tail call void @llvm.dbg.value(metadata !{i8* %3}, i64 0, metadata !19) nounwind, !dbg !38
+  tail call void @llvm.dbg.value(metadata !{double %1}, i64 0, metadata !20) nounwind, !dbg !38
+  tail call void @llvm.dbg.value(metadata !{i8 %5}, i64 0, metadata !21) nounwind, !dbg !38
+  %6 = zext i8 %5 to i32, !dbg !39
+  %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %3, double %1, i32 %6) nounwind, !dbg !39
+  %8 = tail call i32 @printer(i8* %3, double %1, i8 zeroext %5) nounwind, !dbg !40
+  ret i32 0, !dbg !41
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!llvm.dbg.sp = !{!0, !9, !10}
+!llvm.dbg.lv.printer = !{!16, !17, !18}
+!llvm.dbg.lv.inlineprinter = !{!19, !20, !21}
+!llvm.dbg.lv.main = !{!22, !23, !24}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"a.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"/tmp/a.c", metadata !"/tmp", metadata !"(LLVM build 00)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8}
+!5 = metadata !{i32 589860, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 589860, metadata !1, metadata !"double", metadata !1, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 589860, metadata !1, metadata !"unsigned char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !5, metadata !5, metadata !13}
+!13 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 589860, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!16 = metadata !{i32 590081, metadata !0, metadata !"ptr", metadata !1, i32 11, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 590081, metadata !0, metadata !"val", metadata !1, i32 11, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 590081, metadata !0, metadata !"c", metadata !1, i32 11, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 590081, metadata !9, metadata !"ptr", metadata !1, i32 4, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 590081, metadata !9, metadata !"val", metadata !1, i32 4, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 590081, metadata !9, metadata !"c", metadata !1, i32 4, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 17, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 17, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 590080, metadata !25, metadata !"dval", metadata !1, i32 19, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
+!25 = metadata !{i32 589835, metadata !10, i32 18, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 4, i32 0, metadata !9, null}
+!27 = metadata !{i32 6, i32 0, metadata !28, null}
+!28 = metadata !{i32 589835, metadata !9, i32 5, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 7, i32 0, metadata !28, null}
+!30 = metadata !{i32 11, i32 0, metadata !0, null}
+!31 = metadata !{i32 13, i32 0, metadata !32, null}
+!32 = metadata !{i32 589835, metadata !0, i32 12, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!33 = metadata !{i32 14, i32 0, metadata !32, null}
+!34 = metadata !{i32 17, i32 0, metadata !10, null}
+!35 = metadata !{i32 19, i32 0, metadata !25, null}
+!36 = metadata !{i32 20, i32 0, metadata !25, null}
+!37 = metadata !{i32 21, i32 0, metadata !25, null}
+!38 = metadata !{i32 4, i32 0, metadata !9, metadata !37}
+!39 = metadata !{i32 6, i32 0, metadata !28, metadata !37}
+!40 = metadata !{i32 22, i32 0, metadata !25, null}
+!41 = metadata !{i32 23, i32 0, metadata !25, null}
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
new file mode 100644
index 0000000..e83a83d
--- /dev/null
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s - | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.6.7"
+
+;CHECK: DW_OP_regx for Q register: D1
+;CHECK-NEXT: byte
+;CHECK-NEXT: byte
+;CHECK-NEXT: DW_OP_piece 8
+;CHECK-NEXT: byte   8
+;CHECK-NEXT: DW_OP_regx for Q register: D2
+;CHECK-NEXT: byte
+;CHECK-NEXT: byte
+;CHECK-NEXT: DW_OP_piece 8
+;CHECK-NEXT: byte   8
+
+@.str = external constant [13 x i8]
+
+declare <4 x float> @test0001(float) nounwind readnone ssp
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  br label %for.body9
+
+for.body9:                                        ; preds = %for.body9, %entry
+  %add19 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
+  br i1 undef, label %for.end54, label %for.body9, !dbg !44
+
+for.end54:                                        ; preds = %for.body9
+  tail call void @llvm.dbg.value(metadata !{<4 x float> %add19}, i64 0, metadata !27), !dbg !39
+  %tmp115 = extractelement <4 x float> %add19, i32 1
+  %conv6.i75 = fpext float %tmp115 to double, !dbg !45
+  %call.i82 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45
+  ret i32 0, !dbg !49
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !10, !14}
+!llvm.dbg.lv.test0001 = !{!18}
+!llvm.dbg.lv.main = !{!19, !20, !24, !26, !27, !28, !29}
+!llvm.dbg.lv.printFV = !{!30}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"build2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"build2.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589846, metadata !2, metadata !"v4f32", metadata !1, i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 590083, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
+!7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 589857, i64 0, i64 3}         ; [ DW_TAG_subrange_type ]
+!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 589870, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!16 = metadata !{i32 589845, metadata !15, metadata !"", metadata !15, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{null}
+!18 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 590080, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
+!25 = metadata !{i32 589835, metadata !10, i32 59, i32 33, metadata !1, i32 14} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 590080, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
+!27 = metadata !{i32 590080, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!28 = metadata !{i32 590080, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 590080, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!30 = metadata !{i32 590081, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0} ; [ DW_TAG_arg_variable ]
+!31 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 589846, metadata !2, metadata !"FV", metadata !15, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
+!33 = metadata !{i32 589847, metadata !2, metadata !"", metadata !15, i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
+!34 = metadata !{metadata !35, metadata !37}
+!35 = metadata !{i32 589837, metadata !15, metadata !"V", metadata !15, i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
+!36 = metadata !{i32 589846, metadata !2, metadata !"v4sf", metadata !15, i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!37 = metadata !{i32 589837, metadata !15, metadata !"A", metadata !15, i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 589825, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!39 = metadata !{i32 79, i32 7, metadata !40, null}
+!40 = metadata !{i32 589835, metadata !41, i32 75, i32 35, metadata !1, i32 18} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 589835, metadata !42, i32 75, i32 5, metadata !1, i32 17} ; [ DW_TAG_lexical_block ]
+!42 = metadata !{i32 589835, metadata !43, i32 71, i32 32, metadata !1, i32 16} ; [ DW_TAG_lexical_block ]
+!43 = metadata !{i32 589835, metadata !25, i32 71, i32 3, metadata !1, i32 15} ; [ DW_TAG_lexical_block ]
+!44 = metadata !{i32 75, i32 5, metadata !42, null}
+!45 = metadata !{i32 42, i32 2, metadata !46, metadata !48}
+!46 = metadata !{i32 589835, metadata !47, i32 42, i32 2, metadata !15, i32 20} ; [ DW_TAG_lexical_block ]
+!47 = metadata !{i32 589835, metadata !14, i32 41, i32 28, metadata !15, i32 19} ; [ DW_TAG_lexical_block ]
+!48 = metadata !{i32 95, i32 3, metadata !25, null}
+!49 = metadata !{i32 99, i32 3, metadata !25, null}
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
new file mode 100644
index 0000000..548c9bd
--- /dev/null
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s - | FileCheck %s
+; Radar 9309221
+; Test dwarf reg no for s16
+;CHECK: DW_OP_regx for S register
+;CHECK-NEXT: byte
+;CHECK-NEXT: byte
+;CHECK-NEXT: DW_OP_bit_piece 32 0
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.6.7"
+
+@.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00"
+@.str1 = private unnamed_addr constant [6 x i8] c"point\00"
+
+define i32 @inlineprinter(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !8), !dbg !24
+  tail call void @llvm.dbg.value(metadata !{float %val}, i64 0, metadata !10), !dbg !25
+  tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !12), !dbg !26
+  %conv = fpext float %val to double, !dbg !27
+  %conv3 = zext i8 %c to i32, !dbg !27
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !27
+  ret i32 0, !dbg !29
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind optsize
+
+define i32 @printer(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !14), !dbg !30
+  tail call void @llvm.dbg.value(metadata !{float %val}, i64 0, metadata !15), !dbg !31
+  tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !16), !dbg !32
+  %conv = fpext float %val to double, !dbg !33
+  %conv3 = zext i8 %c to i32, !dbg !33
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !33
+  ret i32 0, !dbg !35
+}
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !17), !dbg !36
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !18), !dbg !37
+  %conv = sitofp i32 %argc to double, !dbg !38
+  %add = fadd double %conv, 5.555552e+05, !dbg !38
+  %conv1 = fptrunc double %add to float, !dbg !38
+  tail call void @llvm.dbg.value(metadata !{float %conv1}, i64 0, metadata !22), !dbg !38
+  %call = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0)) nounwind optsize, !dbg !39
+  %add.ptr = getelementptr i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !40
+  %add5 = add nsw i32 %argc, 97, !dbg !40
+  %conv6 = trunc i32 %add5 to i8, !dbg !40
+  tail call void @llvm.dbg.value(metadata !{i8* %add.ptr}, i64 0, metadata !8) nounwind, !dbg !41
+  tail call void @llvm.dbg.value(metadata !{float %conv1}, i64 0, metadata !10) nounwind, !dbg !42
+  tail call void @llvm.dbg.value(metadata !{i8 %conv6}, i64 0, metadata !12) nounwind, !dbg !43
+  %conv.i = fpext float %conv1 to double, !dbg !44
+  %conv3.i = and i32 %add5, 255, !dbg !44
+  %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %add.ptr, double %conv.i, i32 %conv3.i) nounwind optsize, !dbg !44
+  %call14 = tail call i32 @printer(i8* %add.ptr, float %conv1, i8 zeroext %conv6) optsize, !dbg !45
+  ret i32 0, !dbg !46
+}
+
+declare i32 @puts(i8* nocapture) nounwind optsize
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !6, !7}
+!llvm.dbg.lv.inlineprinter = !{!8, !10, !12}
+!llvm.dbg.lv.printer = !{!14, !15, !16}
+!llvm.dbg.lv.main = !{!17, !18, !22}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 590081, metadata !0, metadata !"ptr", metadata !1, i32 16777220, metadata !9, i32 0} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 590081, metadata !0, metadata !"val", metadata !1, i32 33554436, metadata !11, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 590081, metadata !0, metadata !"c", metadata !1, i32 50331652, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 589860, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 590081, metadata !6, metadata !"ptr", metadata !1, i32 16777227, metadata !9, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 590081, metadata !6, metadata !"val", metadata !1, i32 33554443, metadata !11, i32 0} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 590081, metadata !6, metadata !"c", metadata !1, i32 50331659, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 590081, metadata !7, metadata !"argc", metadata !1, i32 16777233, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 590081, metadata !7, metadata !"argv", metadata !1, i32 33554449, metadata !19, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
+!20 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
+!21 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!22 = metadata !{i32 590080, metadata !23, metadata !"dval", metadata !1, i32 19, metadata !11, i32 0} ; [ DW_TAG_auto_variable ]
+!23 = metadata !{i32 589835, metadata !7, i32 18, i32 1, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{i32 4, i32 22, metadata !0, null}
+!25 = metadata !{i32 4, i32 33, metadata !0, null}
+!26 = metadata !{i32 4, i32 52, metadata !0, null}
+!27 = metadata !{i32 6, i32 3, metadata !28, null}
+!28 = metadata !{i32 589835, metadata !0, i32 5, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 7, i32 3, metadata !28, null}
+!30 = metadata !{i32 11, i32 42, metadata !6, null}
+!31 = metadata !{i32 11, i32 53, metadata !6, null}
+!32 = metadata !{i32 11, i32 72, metadata !6, null}
+!33 = metadata !{i32 13, i32 3, metadata !34, null}
+!34 = metadata !{i32 589835, metadata !6, i32 12, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!35 = metadata !{i32 14, i32 3, metadata !34, null}
+!36 = metadata !{i32 17, i32 15, metadata !7, null}
+!37 = metadata !{i32 17, i32 28, metadata !7, null}
+!38 = metadata !{i32 19, i32 31, metadata !23, null}
+!39 = metadata !{i32 20, i32 3, metadata !23, null}
+!40 = metadata !{i32 21, i32 3, metadata !23, null}
+!41 = metadata !{i32 4, i32 22, metadata !0, metadata !40}
+!42 = metadata !{i32 4, i32 33, metadata !0, metadata !40}
+!43 = metadata !{i32 4, i32 52, metadata !0, metadata !40}
+!44 = metadata !{i32 6, i32 3, metadata !28, metadata !40}
+!45 = metadata !{i32 22, i32 3, metadata !23, null}
+!46 = metadata !{i32 23, i32 1, metadata !23, null}
diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll
new file mode 100644
index 0000000..34313aa
--- /dev/null
+++ b/test/CodeGen/ARM/divmod.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=arm-apple-ios | FileCheck %s
+
+define void @foo(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
+entry:
+; CHECK: foo:
+; CHECK: bl ___divmodsi4
+; CHECK-NOT: bl ___divmodsi4
+  %div = sdiv i32 %x, %y
+  store i32 %div, i32* %P, align 4
+  %rem = srem i32 %x, %y
+  %arrayidx6 = getelementptr inbounds i32* %P, i32 1
+  store i32 %rem, i32* %arrayidx6, align 4
+  ret void
+}
+
+define void @bar(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
+entry:
+; CHECK: bar:
+; CHECK: bl ___udivmodsi4
+; CHECK-NOT: bl ___udivmodsi4
+  %div = udiv i32 %x, %y
+  store i32 %div, i32* %P, align 4
+  %rem = urem i32 %x, %y
+  %arrayidx6 = getelementptr inbounds i32* %P, i32 1
+  store i32 %rem, i32* %arrayidx6, align 4
+  ret void
+}
+
+; rdar://9280991
+@flags = external unnamed_addr global i32
+@tabsize = external unnamed_addr global i32
+
+define void @do_indent(i32 %cols) nounwind {
+entry:
+; CHECK: do_indent:
+  %0 = load i32* @flags, align 4
+  %1 = and i32 %0, 67108864
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb1, label %bb
+
+bb:
+; CHECK: bl ___divmodsi4
+  %3 = load i32* @tabsize, align 4
+  %4 = srem i32 %cols, %3
+  %5 = sdiv i32 %cols, %3
+  %6 = tail call i32 @llvm.objectsize.i32(i8* null, i1 false)
+  %7 = tail call i8* @__memset_chk(i8* null, i32 9, i32 %5, i32 %6) nounwind
+  br label %bb1
+
+bb1:
+  %line_indent_len.0 = phi i32 [ %4, %bb ], [ 0, %entry ]
+  %8 = getelementptr inbounds i8* null, i32 %line_indent_len.0
+  store i8 0, i8* %8, align 1
+  ret void
+}
+
+declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readnone
+declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index f03282b..51efe51 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -24,4 +24,4 @@ declare float @fabsf(float)
 ; CORTEXA8: test:
 ; CORTEXA8: 	vabs.f32	d1, d1
 ; CORTEXA9: test:
-; CORTEXA9: 	vabs.f32	s1, s1
+; CORTEXA9: 	vabs.f32	s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
index 749690e..e35103c 100644
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vadd.f32	d0, d1, d0
 ; CORTEXA9: test:
-; CORTEXA9: 	vadd.f32	s0, s1, s0
+; CORTEXA9: 	vadd.f32	s{{.}}, s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fast-isel-pred.ll b/test/CodeGen/ARM/fast-isel-pred.ll
new file mode 100644
index 0000000..8de54ad
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-pred.ll
@@ -0,0 +1,58 @@
+; RUN: llc -O0 -mtriple=armv7-apple-darwin < %s
+
+define i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32, align 4
+  %X = alloca <4 x i32>, align 16
+  %Y = alloca <4 x float>, align 16
+  store i32 0, i32* %retval
+  %tmp = load <4 x i32>* %X, align 16
+  call void @__aa(<4 x i32> %tmp, i8* null, i32 3, <4 x float>* %Y)
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+define internal void @__aa(<4 x i32> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
+entry:
+  %__a.addr.i = alloca <4 x i32>, align 16
+  %v.addr = alloca <4 x i32>, align 16
+  %p.addr = alloca i8*, align 4
+  %offset.addr = alloca i32, align 4
+  %constants.addr = alloca <4 x float>*, align 4
+  store <4 x i32> %v, <4 x i32>* %v.addr, align 16
+  store i8* %p, i8** %p.addr, align 4
+  store i32 %offset, i32* %offset.addr, align 4
+  store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
+  %tmp = load <4 x i32>* %v.addr, align 16
+  store <4 x i32> %tmp, <4 x i32>* %__a.addr.i, align 16
+  %tmp.i = load <4 x i32>* %__a.addr.i, align 16
+  %0 = bitcast <4 x i32> %tmp.i to <16 x i8>
+  %1 = bitcast <16 x i8> %0 to <4 x i32>
+  %vcvt.i = sitofp <4 x i32> %1 to <4 x float>
+  %tmp1 = load i8** %p.addr, align 4
+  %tmp2 = load i32* %offset.addr, align 4
+  %tmp3 = load <4 x float>** %constants.addr, align 4
+  call void @__bb(<4 x float> %vcvt.i, i8* %tmp1, i32 %tmp2, <4 x float>* %tmp3)
+  ret void
+}
+
+define internal void @__bb(<4 x float> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
+entry:
+  %v.addr = alloca <4 x float>, align 16
+  %p.addr = alloca i8*, align 4
+  %offset.addr = alloca i32, align 4
+  %constants.addr = alloca <4 x float>*, align 4
+  %data = alloca i64, align 4
+  store <4 x float> %v, <4 x float>* %v.addr, align 16
+  store i8* %p, i8** %p.addr, align 4
+  store i32 %offset, i32* %offset.addr, align 4
+  store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
+  %tmp = load i64* %data, align 4
+  %tmp1 = load i8** %p.addr, align 4
+  %tmp2 = load i32* %offset.addr, align 4
+  %add.ptr = getelementptr i8* %tmp1, i32 %tmp2
+  %0 = bitcast i8* %add.ptr to i64*
+  %arrayidx = getelementptr inbounds i64* %0, i32 0
+  store i64 %tmp, i64* %arrayidx
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
new file mode 100644
index 0000000..08dcc64
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -0,0 +1,11 @@
+; RUN: llc -O0 -regalloc=linearscan < %s
+; This isn't exactly a useful set of command-line options, but check that it
+; doesn't crash.  (It was crashing because a register was getting redefined.)
+
+target triple = "thumbv7-apple-macosx10.6.7"
+
+define i32 @f(i32* %x) nounwind ssp {
+  %y = getelementptr inbounds i32* %x, i32 5000
+  %tmp103 = load i32* %y, align 4
+  ret i32 %tmp103
+}
diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll
index 8f58480..2d79674 100644
--- a/test/CodeGen/ARM/fast-isel-static.ll
+++ b/test/CodeGen/ARM/fast-isel-static.ll
@@ -24,7 +24,7 @@ entry:
   store float 0.000000e+00, float* %ztot, align 4
   store float 1.000000e+00, float* %z, align 4
 ; CHECK-LONG: blx     r2
-; CHECK-NORM: blx     _myadd
+; CHECK-NORM: bl      _myadd
   call void @myadd(float* %ztot, float* %z)
   ret i32 0
 }
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index dd806ec..6aad92f 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -1,8 +1,7 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
 
 ; Very basic fast-isel functionality.
-
 define i32 @add(i32 %a, i32 %b) nounwind {
 entry:
   %a.addr = alloca i32, align 4
@@ -13,4 +12,52 @@ entry:
   %tmp1 = load i32* %b.addr
   %add = add nsw i32 %tmp, %tmp1
   ret i32 %add
-}
-\ No newline at end of file
+}
+
+; Check truncate to bool
+define void @test1(i32 %tmp) nounwind {
+entry:
+%tobool = trunc i32 %tmp to i1
+br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+call void @test1(i32 0)
+br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+ret void
+; ARM: test1:
+; ARM: tst r0, #1
+; THUMB: test1:
+; THUMB: tst.w r0, #1
+}
+
+; Check some simple operations with immediates
+define void @test2(i32 %tmp, i32* %ptr) nounwind {
+; THUMB: test2:
+; ARM: test2:
+
+b1:
+  %a = add i32 %tmp, 4096
+  store i32 %a, i32* %ptr
+  br label %b2
+
+; THUMB: add.w {{.*}} #4096
+; ARM: add {{.*}} #1, #20
+
+b2:
+  %b = add i32 %tmp, 4095
+  store i32 %b, i32* %ptr
+  br label %b3
+; THUMB: addw {{.*}} #4095
+; ARM: movw {{.*}} #4095
+; ARM: add
+
+b3:
+  %c = or i32 %tmp, 4
+  store i32 %c, i32* %ptr
+  ret void
+
+; THUMB: orr {{.*}} #4
+; ARM: orr {{.*}} #4
+}
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index d30e3eb..f241c26 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -40,5 +40,37 @@ entry:
   ret double %1
 }
 
+; rdar://9059537
+define i32 @test4() ssp {
+entry:
+; SOFT: test4:
+; SOFT: vmov.f64 [[REG4:(d[0-9]+)]], #1.000000e+00
+; This S-reg must be the first sub-reg of the last D-reg on vbsl.
+; SOFT: vcvt.f32.f64 {{s1?[02468]}}, [[REG4]]
+; SOFT: vshr.u64 [[REG4]], [[REG4]], #32
+; SOFT: vmov.i32 [[REG5:(d[0-9]+)]], #0x80000000
+; SOFT: vbsl [[REG5]], [[REG4]], {{d[0-9]+}}
+  %call80 = tail call double @copysign(double 1.000000e+00, double undef)
+  %conv81 = fptrunc double %call80 to float
+  %tmp88 = bitcast float %conv81 to i32
+  ret i32 %tmp88
+}
+
+; rdar://9287902
+define float @test5() nounwind {
+entry:
+; SOFT: test5:
+; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
+; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1
+; SOFT: vshr.u64 [[REG7]], [[REG7]], #32
+; SOFT: vbsl [[REG6]], [[REG7]], 
+  %0 = tail call double (...)* @bar() nounwind
+  %1 = fptrunc double %0 to float
+  %2 = tail call float @copysignf(float 5.000000e-01, float %1) nounwind readnone
+  %3 = fadd float %1, %2
+  ret float %3
+}
+
+declare double @bar(...)
 declare double @copysign(double, double) nounwind
 declare float @copysignf(float, float) nounwind
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
index 0c31495..31c1ca9 100644
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vdiv.f32	s0, s1, s0
 ; CORTEXA9: test:
-; CORTEXA9: 	vdiv.f32	s0, s1, s0
+; CORTEXA9: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll
index fb83ef6..b63f609 100644
--- a/test/CodeGen/ARM/fmacs.ll
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s -check-prefix=HARD
 
 define float @t1(float %acc, float %a, float %b) {
 entry:
@@ -49,3 +51,54 @@ entry:
         %1 = fadd float %0, %acc
 	ret float %1
 }
+
+; It's possible to make use of fp vmla / vmls on Cortex-A9.
+; rdar://8659675
+define void @t4(float %acc1, float %a, float %b, float %acc2, float %c, float* %P1, float* %P2) {
+entry:
+; A8: t4:
+; A8: vmul.f32
+; A8: vmul.f32
+; A8: vadd.f32
+; A8: vadd.f32
+
+; Two vmla with now RAW hazard
+; A9: t4:
+; A9: vmla.f32
+; A9: vmla.f32
+
+; HARD: t4:
+; HARD: vmla.f32 s0, s1, s2
+; HARD: vmla.f32 s3, s1, s4
+  %0 = fmul float %a, %b
+  %1 = fadd float %acc1, %0
+  %2 = fmul float %a, %c
+  %3 = fadd float %acc2, %2
+  store float %1, float* %P1
+  store float %3, float* %P2
+  ret void
+}
+
+define float @t5(float %a, float %b, float %c, float %d, float %e) {
+entry:
+; A8: t5:
+; A8: vmul.f32
+; A8: vmul.f32
+; A8: vadd.f32
+; A8: vadd.f32
+
+; A9: t5:
+; A9: vmla.f32
+; A9: vmul.f32
+; A9: vadd.f32
+
+; HARD: t5:
+; HARD: vmla.f32 s4, s0, s1
+; HARD: vmul.f32 s0, s2, s3
+; HARD: vadd.f32 s0, s4, s0
+  %0 = fmul float %a, %b
+  %1 = fadd float %e, %0
+  %2 = fmul float %c, %d
+  %3 = fadd float %1, %2
+  ret float %3
+}
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index ef4e3e5..bc118b8 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vmul.f32	d0, d1, d0
 ; CORTEXA9: test:
-; CORTEXA9: 	vmul.f32	s0, s1, s0
+; CORTEXA9: 	vmul.f32	s{{.}}, s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 76c8067..9facf20 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8
 
 define float @t1(float %acc, float %a, float %b) nounwind {
 entry:
@@ -11,8 +12,8 @@ entry:
 ; NEON: vnmla.f32
 
 ; A8: t1:
-; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}}
-; A8: vsub.f32 d0, d0, d1
+; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
+; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
 	%0 = fmul float %a, %b
 	%1 = fsub float -0.0, %0
         %2 = fsub float %1, %acc
@@ -28,8 +29,8 @@ entry:
 ; NEON: vnmla.f32
 
 ; A8: t2:
-; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}}
-; A8: vsub.f32 d0, d0, d1
+; A8: vnmul.f32 s{{[0123]}}, s{{[0123]}}, s{{[0123]}}
+; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
 	%0 = fmul float %a, %b
 	%1 = fmul float -1.0, %0
         %2 = fsub float %1, %acc
@@ -45,8 +46,8 @@ entry:
 ; NEON: vnmla.f64
 
 ; A8: t3:
-; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}}
-; A8: vsub.f64 d16, d16, d17
+; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
+; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
 	%0 = fmul double %a, %b
 	%1 = fsub double -0.0, %0
         %2 = fsub double %1, %acc
@@ -62,8 +63,8 @@ entry:
 ; NEON: vnmla.f64
 
 ; A8: t4:
-; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}}
-; A8: vsub.f64 d16, d16, d17
+; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
+; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
 	%0 = fmul double %a, %b
 	%1 = fmul double -1.0, %0
         %2 = fsub double %1, %acc
diff --git a/test/CodeGen/ARM/fp-arg-shuffle.ll b/test/CodeGen/ARM/fp-arg-shuffle.ll
new file mode 100644
index 0000000..ae02b79
--- /dev/null
+++ b/test/CodeGen/ARM/fp-arg-shuffle.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+neon -float-abi=soft | FileCheck %s
+
+; CHECK: function1
+; CHECK-NOT: vmov
+define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp {
+entry:
+  %call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind
+  ret double %call
+}
+
+declare double @function2(double, double, double, double, double, double)
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index b6e9c3c..8ef45f2 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -51,7 +51,7 @@ entry:
 
 define float @h2() {
 ;CHECK: h2:
-;CHECK: mov r0, #254, 10
+;CHECK: mov r0, #254, #10
 entry:
         ret float 1.000000e+00
 }
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
index 1ef9f7f..86c06f1 100644
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -5,7 +5,7 @@
 
 define i32 @test1(float %a, float %b) {
 ; VFP2: test1:
-; VFP2: vcvt.s32.f32 s0, s0
+; VFP2: vcvt.s32.f32 s{{.}}, s{{.}}
 ; NEON: test1:
 ; NEON: vcvt.s32.f32 d0, d0
 entry:
@@ -16,7 +16,7 @@ entry:
 
 define i32 @test2(float %a, float %b) {
 ; VFP2: test2:
-; VFP2: vcvt.u32.f32 s0, s0
+; VFP2: vcvt.u32.f32 s{{.}}, s{{.}}
 ; NEON: test2:
 ; NEON: vcvt.u32.f32 d0, d0
 entry:
@@ -27,7 +27,7 @@ entry:
 
 define float @test3(i32 %a, i32 %b) {
 ; VFP2: test3:
-; VFP2: vcvt.f32.u32 s0, s0
+; VFP2: vcvt.f32.u32 s{{.}}, s{{.}}
 ; NEON: test3:
 ; NEON: vcvt.f32.u32 d0, d0
 entry:
@@ -38,7 +38,7 @@ entry:
 
 define float @test4(i32 %a, i32 %b) {
 ; VFP2: test4:
-; VFP2: vcvt.f32.s32 s0, s0
+; VFP2: vcvt.f32.s32 s{{.}}, s{{.}}
 ; NEON: test4:
 ; NEON: vcvt.f32.s32 d0, d0
 entry:
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index 65b921b..7c0dd0e 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -37,7 +37,7 @@ define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
 entry:
 ; FINITE: t2:
 ; FINITE-NOT: vldr
-; FINITE: ldrd r0, [r0]
+; FINITE: ldrd r0, r1, [r0]
 ; FINITE-NOT: b LBB
 ; FINITE: cmp r0, #0
 ; FINITE: cmpeq r1, #0
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
index 75428ac..18f87bf 100644
--- a/test/CodeGen/ARM/ifcvt10.ll
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -9,9 +9,9 @@ entry:
 ; CHECK: t:
 ; CHECK: vpop {d8}
 ; CHECK-NOT: vpopne
-; CHECK: ldmia sp!, {r7, pc}
+; CHECK: pop {r7, pc}
 ; CHECK: vpop {d8}
-; CHECK: ldmia sp!, {r7, pc}
+; CHECK: pop {r7, pc}
   br i1 undef, label %if.else, label %if.then
 
 if.then:                                          ; preds = %entry
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index bca2ae3..3615055 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -11,7 +11,7 @@ entry:
 
 define i32 @t1(i32 %a, i32 %b) {
 ; CHECK: t1:
-; CHECK: ldmialt sp!, {r7, pc}
+; CHECK: poplt {r7, pc}
 entry:
 	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 5edf32f..2327657 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -3,7 +3,7 @@
 define void @foo(i32 %X, i32 %Y) {
 entry:
 ; CHECK: cmpne
-; CHECK: ldmiahi sp!
+; CHECK: pophi
 	%tmp1 = icmp ult i32 %X, 4		; <i1> [#uses=1]
 	%tmp4 = icmp eq i32 %Y, 0		; <i1> [#uses=1]
 	%tmp7 = or i1 %tmp4, %tmp1		; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll
index 62e1355..476ed4d 100644
--- a/test/CodeGen/ARM/ifcvt7.ll
+++ b/test/CodeGen/ARM/ifcvt7.ll
@@ -6,7 +6,7 @@
 define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
 ; CHECK: cmpeq
 ; CHECK: moveq
-; CHECK: ldmiaeq sp!
+; CHECK: popeq
 entry:
 	br label %tailrecurse
 
diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll
index 5fdfc4e..ca9a5c6 100644
--- a/test/CodeGen/ARM/ifcvt8.ll
+++ b/test/CodeGen/ARM/ifcvt8.ll
@@ -5,7 +5,7 @@
 declare void @abort()
 
 define fastcc void @t(%struct.SString* %word, i8 signext  %c) {
-; CHECK: ldmiane sp!
+; CHECK: popne
 entry:
 	%tmp1 = icmp eq %struct.SString* %word, null		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %cond_false
diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll
index 0aac9d1..f0ab9dd 100644
--- a/test/CodeGen/ARM/indirectbr.ll
+++ b/test/CodeGen/ARM/indirectbr.ll
@@ -14,15 +14,15 @@ entry:
   %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
 ; indirect branch gets duplicated here
 ; ARM: bx
-; THUMB: mov pc, r1
-; THUMB2: mov pc, r2
+; THUMB: mov pc,
+; THUMB2: mov pc,
   br i1 %1, label %bb3, label %bb2
 
 bb2:                                              ; preds = %entry, %bb3
   %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
 ; ARM: bx
-; THUMB: mov pc, r1
-; THUMB2: mov pc, r2
+; THUMB: mov pc,
+; THUMB2: mov pc,
   indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
 
 bb3:                                              ; preds = %entry
@@ -42,20 +42,23 @@ L3:                                               ; preds = %L4, %bb2
   br label %L2
 
 L2:                                               ; preds = %L3, %bb2
+; THUMB: muls
   %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ]   ; <i32> [#uses=1]
   %phitmp = mul i32 %res.2, 6                     ; <i32> [#uses=1]
   br label %L1
 
 L1:                                               ; preds = %L2, %bb2
   %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
-; ARM: ldr r1, LCPI
-; ARM: add r1, pc, r1
-; ARM: str r1
-; THUMB: ldr.n r2, LCPI
-; THUMB: add r2, pc
-; THUMB: str r2
-; THUMB2: ldr.n r2, LCPI
-; THUMB2-NEXT: str r2
+; ARM: ldr [[R1:r[0-9]+]], LCPI
+; ARM: add [[R1b:r[0-9]+]], pc, [[R1]]
+; ARM: str [[R1b]]
+; THUMB: ldr.n
+; THUMB: add
+; THUMB: ldr.n [[R2:r[0-9]+]], LCPI
+; THUMB: add [[R2]], pc
+; THUMB: str [[R2]]
+; THUMB2: ldr.n [[R2:r[0-9]+]], LCPI
+; THUMB2-NEXT: str{{(.w)?}} [[R2]]
   store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
   ret i32 %res.3
 }
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
index 9f77ad1..9d6eba8 100644
--- a/test/CodeGen/ARM/inlineasm3.ll
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -6,7 +6,7 @@
 define void @t() nounwind {
 entry:
 ; CHECK: vmov.I64 q15, #0
-; CHECK: vmov.32 d30[0], r0
+; CHECK: vmov.32 d30[0],
 ; CHECK: vmov q8, q15
   %tmp = alloca %struct.int32x4_t, align 16
   call void asm sideeffect "vmov.I64 q15, #0\0Avmov.32 d30[0], $1\0Avmov ${0:q}, q15\0A", "=*w,r,~{d31},~{d30}"(%struct.int32x4_t* %tmp, i32 8192) nounwind
diff --git a/test/CodeGen/ARM/int-to-fp.ll b/test/CodeGen/ARM/int-to-fp.ll
new file mode 100644
index 0000000..889b149
--- /dev/null
+++ b/test/CodeGen/ARM/int-to-fp.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10.0.0"
+
+; CHECK: sint_to_fp
+; CHECK: vmovl.s16
+; CHECK: vcvt.f32.s32
+define <4 x float> @sint_to_fp(<4 x i16> %x) nounwind ssp {
+  %a = sitofp <4 x i16> %x to <4 x float>
+  ret <4 x float> %a
+}
+
+; CHECK: uint_to_fp
+; CHECK: vmovl.u16
+; CHECK: vcvt.f32.u32
+define <4 x float> @uint_to_fp(<4 x i16> %x) nounwind ssp {
+  %a = uitofp <4 x i16> %x to <4 x float>
+  ret <4 x float> %a
+}
diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll
index 2f1b85e..db78fd0 100644
--- a/test/CodeGen/ARM/ldm.ll
+++ b/test/CodeGen/ARM/ldm.ll
@@ -5,9 +5,9 @@
 
 define i32 @t1() {
 ; CHECK: t1:
-; CHECK: ldmia
+; CHECK: pop
 ; V4T: t1:
-; V4T: ldmia
+; V4T: pop
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
         %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
@@ -16,9 +16,9 @@ define i32 @t1() {
 
 define i32 @t2() {
 ; CHECK: t2:
-; CHECK: ldmia
+; CHECK: pop
 ; V4T: t2:
-; V4T: ldmia
+; V4T: pop
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
         %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
@@ -29,7 +29,7 @@ define i32 @t2() {
 define i32 @t3() {
 ; CHECK: t3:
 ; CHECK: ldmib
-; CHECK: ldmia sp!
+; CHECK: pop
 ; V4T: t3:
 ; V4T: ldmib
 ; V4T: pop
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 895562a..8010f20 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,19 +1,21 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=V6
-; RUN: llc < %s -mtriple=armv5-apple-darwin | FileCheck %s -check-prefix=V5
-; RUN: llc < %s -mtriple=armv6-eabi | FileCheck %s -check-prefix=EABI
+; RUN: llc < %s -mtriple=armv6-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V6
+; RUN: llc < %s -mtriple=armv5-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V5
+; RUN: llc < %s -mtriple=armv6-eabi -regalloc=linearscan | FileCheck %s -check-prefix=EABI
 ; rdar://r6949835
 
+; Magic ARM pair hints works best with linearscan.
+
 @b = external global i64*
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
-;V6:   ldrd r2, [r2]
+;V6:   ldrd r2, r3, [r2]
 
-;V5:   ldr r3, [r2]
-;V5:   ldr r2, [r2, #4]
+;V5:   ldr r{{[0-9]+}}, [r2]
+;V5:   ldr r{{[0-9]+}}, [r2, #4]
 
-;EABI: ldr r3, [r2]
-;EABI: ldr r2, [r2, #4]
+;EABI: ldr r{{[0-9]+}}, [r2]
+;EABI: ldr r{{[0-9]+}}, [r2, #4]
 
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll
index 74f8d78..e401dca 100644
--- a/test/CodeGen/ARM/long.ll
+++ b/test/CodeGen/ARM/long.ll
@@ -14,14 +14,14 @@ entry:
 
 define i64 @f3() {
 ; CHECK: f3:
-; CHECK: mvn r0, #2, 2
+; CHECK: mvn r0, #2, #2
 entry:
         ret i64 2147483647
 }
 
 define i64 @f4() {
 ; CHECK: f4:
-; CHECK: mov r0, #2, 2
+; CHECK: mov r0, #2, #2
 entry:
         ret i64 2147483648
 }
@@ -29,7 +29,7 @@ entry:
 define i64 @f5() {
 ; CHECK: f5:
 ; CHECK: mvn r0, #0
-; CHECK: mvn r1, #2, 2
+; CHECK: mvn r1, #2, #2
 entry:
         ret i64 9223372036854775807
 }
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index 5e4f573..d5aac2e 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -24,9 +24,10 @@ define i32 @f2(i64 %x, i64 %y) {
 ; CHECK: f2
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
-; CHECK-NEXT: subs    r2, r2, #32
+; CHECK-NEXT: sub     r2, r2, #32
+; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
-; CHECK-NEXT: movge   r0, r1, asr r2
+; CHECK-NEXT: asrge   r0, r1, r2
 	%a = ashr i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
@@ -36,9 +37,10 @@ define i32 @f3(i64 %x, i64 %y) {
 ; CHECK: f3
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
-; CHECK-NEXT: subs    r2, r2, #32
+; CHECK-NEXT: sub     r2, r2, #32
+; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
-; CHECK-NEXT: movge   r0, r1, lsr r2
+; CHECK-NEXT: lsrge   r0, r1, r2
 	%a = lshr i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll
index 1bbb96d..153fd8f 100644
--- a/test/CodeGen/ARM/lsr-code-insertion.ll
+++ b/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed}
-; RUN: llc < %s -stats |& not grep {.*Number of re-materialization}
+; RUN: llc < %s | FileCheck %s
 ; This test really wants to check that the resultant "cond_true" block only 
 ; has a single store in it, and that cond_true55 only has code to materialize 
 ; the constant and do a store.  We do *not* want something like this:
@@ -8,6 +7,11 @@
 ;        add r8, r0, r6
 ;        str r10, [r8, #+4]
 ;
+; CHECK: ldr [[R6:r[0-9*]+]], LCP
+; CHECK: cmp {{.*}}, [[R6]]
+; CHECK: ldrle
+; CHECK-NEXT: strle
+
 target triple = "arm-apple-darwin8"
 
 define void @foo(i32* %mc, i32* %mpp, i32* %ip, i32* %dpp, i32* %tpmm, i32 %M, i32* %tpim, i32* %tpdm, i32* %bp, i32* %ms, i32 %xmb) {
diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
index 9882690..c1318ec 100644
--- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
+++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
@@ -4,11 +4,6 @@
 ; constant offset addressing, so that each of the following stores
 ; uses the same register.
 
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-128]
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-96]
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-64]
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-32]
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}]
 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32]
 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64]
 ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96]
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index ed20c32..5bae037 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,9 +1,11 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldmia
-; RUN: llc < %s -mtriple=arm-apple-darwin | grep stmia
-; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrb
-; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrh
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
 
-	%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+; The ARM magic hinting works best with linear scan.
+; CHECK: ldrd
+; CHECK: strd
+; CHECK: ldrb
+
+%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
 @src = external global %struct.x
 @dst = external global %struct.x
 
diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll
index e337970..de48fee 100644
--- a/test/CodeGen/ARM/neon_div.ll
+++ b/test/CodeGen/ARM/neon_div.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source | FileCheck %s
 
 define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vrecpe.f32
diff --git a/test/CodeGen/ARM/neon_shift.ll b/test/CodeGen/ARM/neon_shift.ll
new file mode 100644
index 0000000..340f220
--- /dev/null
+++ b/test/CodeGen/ARM/neon_shift.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; <rdar://problem/9055897>
+define <4 x i16> @t1(<4 x i32> %a) nounwind {
+entry:
+; CHECK: vqrshrn.s32 d{{[0-9]+}}, q{{[0-9]*}}, #13
+  %x = tail call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %a, <4 x i32> <i32 -13, i32 -13, i32 -13, i32 -13>)
+  ret <4 x i16> %x
+}
+
+declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/peephole-bitcast.ll b/test/CodeGen/ARM/peephole-bitcast.ll
new file mode 100644
index 0000000..e670a5b
--- /dev/null
+++ b/test/CodeGen/ARM/peephole-bitcast.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=linearscan | FileCheck %s
+
+; vmov s0, r0 + vmov r0, s0 should have been optimized away.
+; rdar://9104514
+
+; Peephole leaves a dead vmovsr instruction behind, and depends on linear scan
+; to remove it.
+
+define void @t(float %x) nounwind ssp {
+entry:
+; CHECK:     t:
+; CHECK-NOT: vmov
+; CHECK:     bl
+  %0 = bitcast float %x to i32
+  %cmp = icmp ult i32 %0, 2139095039
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @doSomething(float %x) nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+declare void @doSomething(float)
diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll
index 895b27b..95f082a 100644
--- a/test/CodeGen/ARM/prefetch.ll
+++ b/test/CodeGen/ARM/prefetch.ll
@@ -1,10 +1,15 @@
 ; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld
-; RUN: llc < %s -march=thumb -mattr=+v7a     | FileCheck %s -check-prefix=THUMB2
-; RUN: llc < %s -march=arm   -mattr=+v7a,+mp | FileCheck %s -check-prefix=ARM-MP
+; RUN: llc < %s -march=thumb -mattr=+v7a        | FileCheck %s -check-prefix=THUMB2
+; RUN: llc < %s -march=arm   -mattr=+v7a        | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -march=arm   -mcpu=cortex-a9-mp | FileCheck %s -check-prefix=ARM-MP
 ; rdar://8601536
 
 define void @t1(i8* %ptr) nounwind  {
 entry:
+; ARM: t1:
+; ARM-NOT: pldw [r0]
+; ARM: pld [r0]
+
 ; ARM-MP: t1:
 ; ARM-MP: pldw [r0]
 ; ARM-MP: pld [r0]
@@ -19,8 +24,8 @@ entry:
 
 define void @t2(i8* %ptr) nounwind  {
 entry:
-; ARM-MP: t2:
-; ARM-MP: pld [r0, #1023]
+; ARM: t2:
+; ARM: pld [r0, #1023]
 
 ; THUMB2: t2:
 ; THUMB2: pld [r0, #1023]
@@ -31,8 +36,8 @@ entry:
 
 define void @t3(i32 %base, i32 %offset) nounwind  {
 entry:
-; ARM-MP: t3:
-; ARM-MP: pld [r0, r1, lsr #2]
+; ARM: t3:
+; ARM: pld [r0, r1, lsr #2]
 
 ; THUMB2: t3:
 ; THUMB2: lsrs r1, r1, #2
@@ -46,8 +51,8 @@ entry:
 
 define void @t4(i32 %base, i32 %offset) nounwind  {
 entry:
-; ARM-MP: t4:
-; ARM-MP: pld [r0, r1, lsl #2]
+; ARM: t4:
+; ARM: pld [r0, r1, lsl #2]
 
 ; THUMB2: t4:
 ; THUMB2: pld [r0, r1, lsl #2]
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 53214fd..d350937 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s
 ; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's.
 
 %struct.int16x8_t = type { <8 x i16> }
@@ -123,9 +124,9 @@ return1:
 return2:
 ; CHECK:        %return2
 ; CHECK:        vadd.i32
-; CHECK:        vmov q9, q11
+; CHECK:        vmov {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK-NOT:    vmov
-; CHECK:        vst2.32 {d16, d17, d18, d19}
+; CHECK:        vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
   %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
   %tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
   %tmp102 = add <4 x i32> %tmp100, %tmp101              ; <<4 x i32>> [#uses=1]
@@ -137,9 +138,10 @@ return2:
 define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
 ; CHECK:        t5:
 ; CHECK:        vldmia
-; CHECK:        vmov q9, q8
+; How can FileCheck match Q and D registers? We need a lisp interpreter.
+; CHECK:        vmov {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK-NOT:    vmov
-; CHECK:        vld2.16 {d16[1], d18[1]}, [r0]
+; CHECK:        vld2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0]
 ; CHECK-NOT:    vmov
 ; CHECK:        vadd.i16
   %tmp0 = bitcast i16* %A to i8*                  ; <i8*> [#uses=1]
@@ -154,8 +156,8 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
 define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
 ; CHECK:        t6:
 ; CHECK:        vldr.64
-; CHECK:        vmov d17, d16
-; CHECK-NEXT:   vld2.8 {d16[1], d17[1]}
+; CHECK:        vmov d[[D0:[0-9]+]], d[[D1:[0-9]+]]
+; CHECK-NEXT:   vld2.8 {d[[D1]][1], d[[D0]][1]}
   %tmp1 = load <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
   %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
   %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1]
@@ -169,10 +171,10 @@ entry:
 ; CHECK:        t7:
 ; CHECK:        vld2.32
 ; CHECK:        vst2.32
-; CHECK:        vld1.32 {d16, d17},
-; CHECK:        vmov q9, q8
+; CHECK:        vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}},
+; CHECK:        vmov q[[Q0:[0-9]+]], q[[Q1:[0-9]+]]
 ; CHECK-NOT:    vmov
-; CHECK:        vuzp.32 q8, q9
+; CHECK:        vuzp.32 q[[Q1]], q[[Q0]]
 ; CHECK:        vst1.32
   %0 = bitcast i32* %iptr to i8*                  ; <i8*> [#uses=2]
   %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
@@ -271,7 +273,7 @@ define arm_aapcs_vfpcc i32 @t10() nounwind {
 entry:
 ; CHECK: t10:
 ; CHECK: vmul.f32 q8, q8, d0[0]
-; CHECK: vmov.i32 q9, #0x3F000000
+; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3F000000
 ; CHECK: vadd.f32 q8, q8, q8
   %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
   %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index 687bf88..4170ff3 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s
 
-define i32 @test1(i32 %X) {
+define i32 @test1(i32 %X) nounwind {
 ; CHECK: test1
 ; CHECK: rev16 r0, r0
         %tmp1 = lshr i32 %X, 8
@@ -16,7 +16,7 @@ define i32 @test1(i32 %X) {
         ret i32 %tmp14
 }
 
-define i32 @test2(i32 %X) {
+define i32 @test2(i32 %X) nounwind {
 ; CHECK: test2
 ; CHECK: revsh r0, r0
         %tmp1 = lshr i32 %X, 8
@@ -28,3 +28,29 @@ define i32 @test2(i32 %X) {
         %tmp5.upgrd.2 = sext i16 %tmp5 to i32
         ret i32 %tmp5.upgrd.2
 }
+
+; rdar://9147637
+define i32 @test3(i16 zeroext %a) nounwind {
+entry:
+; CHECK: test3:
+; CHECK: revsh r0, r0
+  %0 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %1 = sext i16 %0 to i32
+  ret i32 %1
+}
+
+declare i16 @llvm.bswap.i16(i16) nounwind readnone
+
+define i32 @test4(i16 zeroext %a) nounwind {
+entry:
+; CHECK: test4:
+; CHECK: revsh r0, r0
+  %conv = zext i16 %a to i32
+  %shr9 = lshr i16 %a, 8
+  %conv2 = zext i16 %shr9 to i32
+  %shl = shl nuw nsw i32 %conv, 8
+  %or = or i32 %conv2, %shl
+  %sext = shl i32 %or, 16
+  %conv8 = ashr exact i32 %sext, 16
+  ret i32 %conv8
+}
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll
index 578834e..82ed018 100644
--- a/test/CodeGen/ARM/select-imm.ll
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -6,7 +6,7 @@ define i32 @t1(i32 %c) nounwind readnone {
 entry:
 ; ARM: t1:
 ; ARM: mov r1, #101
-; ARM: orr r1, r1, #1, 24
+; ARM: orr r1, r1, #1, #24
 ; ARM: movgt r0, #123
 
 ; ARMT2: t1:
@@ -27,7 +27,7 @@ entry:
 ; ARM: t2:
 ; ARM: mov r0, #123
 ; ARM: movgt r0, #101
-; ARM: orrgt r0, r0, #1, 24
+; ARM: orrgt r0, r0, #1, #24
 
 ; ARMT2: t2:
 ; ARMT2: mov r0, #123
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 1aa0d39..d1493ee 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -90,3 +90,26 @@ define arm_apcscc float @f8(i32 %a) nounwind {
   %tmp1 = select i1 %tmp, float 0x3FF3BE76C0000000, float 0x40030E9A20000000
   ret float %tmp1
 }
+
+; <rdar://problem/9049552>
+; Glue values can only have a single use, but the following test exposed a
+; case where a SELECT was lowered with 2 uses of a comparison, causing the
+; scheduler to assert.
+; CHECK-VFP: f9:
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+define void @f9() optsize {
+entry:
+  %cmp = icmp eq i8* undef, inttoptr (i32 4 to i8*)
+  %conv191 = select i1 %cmp, float -3.000000e+00, float 0.000000e+00
+  %conv195 = select i1 %cmp, double -1.000000e+00, double 0.000000e+00
+  %add = fadd double %conv195, 1.100000e+01
+  %conv196 = fptrunc double %add to float
+  %add201 = fadd float undef, %conv191
+  %tmp484 = bitcast float %conv196 to i32
+  %tmp478 = bitcast float %add201 to i32
+  %tmp490 = insertvalue [2 x i32] undef, i32 %tmp484, 0
+  %tmp493 = insertvalue [2 x i32] %tmp490, i32 %tmp478, 1
+  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, [2 x i32], i32, float)*)(i8* undef, i8* undef, [2 x i32] %tmp493, i32 0, float 1.000000e+00) optsize
+  ret void
+}
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 5dabfc3..4211797 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -4,7 +4,7 @@
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; ARM: t1:
-; ARM: sub r0, r1, #6, 2
+; ARM: sub r0, r1, #6, #2
 ; ARM: movgt r0, r1
 
 ; T2: t1:
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 01e3a92..f0e2d10 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -51,19 +51,19 @@ entry:
 
 declare i8* @malloc(...)
 
-define fastcc void @test4() nounwind {
+define fastcc void @test4(i16 %addr) nounwind {
 entry:
 ; A8: test4:
-; A8: ldr r1, [r0, r0, lsl #2]
-; A8: str r1, [r0, r0, lsl #2]
+; A8: ldr r2, [r0, r1, lsl #2]
+; A8: str r2, [r0, r1, lsl #2]
 
 ; A9: test4:
-; A9: add r0, r0, r0, lsl #2
+; A9: add r0, r0, r{{[0-9]+}}, lsl #2
 ; A9: ldr r1, [r0]
 ; A9: str r1, [r0]
   %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
   %1 = bitcast i8* %0 to i32*
-  %2 = sext i16 undef to i32
+  %2 = sext i16 %addr to i32
   %3 = getelementptr inbounds i32* %1, i32 %2
   %4 = load i32* %3, align 4
   %5 = add nsw i32 %4, 1
diff --git a/test/CodeGen/ARM/shuffle.ll b/test/CodeGen/ARM/shuffle.ll
new file mode 100644
index 0000000..7d6be4f
--- /dev/null
+++ b/test/CodeGen/ARM/shuffle.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin"
+
+define <8 x i8> @shuf(<8 x i8> %a) nounwind readnone optsize ssp {
+entry:
+; CHECK: vtbl
+  %shuffle = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 4, i32 5, i32 0>
+  ret <8 x i8> %shuffle
+}
+
+define <8 x i8> @shuf2(<8 x i8> %a, <8 x i8> %b) nounwind readnone optsize ssp {
+entry:
+; CHECK: vtbl
+  %shuffle = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 4, i32 5, i32 8>
+  ret <8 x i8> %shuffle
+}
diff --git a/test/CodeGen/ARM/smul.ll b/test/CodeGen/ARM/smul.ll
index b7ab2e7..686d791 100644
--- a/test/CodeGen/ARM/smul.ll
+++ b/test/CodeGen/ARM/smul.ll
@@ -1,16 +1,12 @@
-; RUN: llc < %s -march=arm
-; RUN: llc < %s -march=arm -mattr=+v5TE
-; RUN: llc < %s -march=arm -mattr=+v5TE | \
-; RUN:   grep smulbt | count 1
-; RUN: llc < %s -march=arm -mattr=+v5TE | \
-; RUN:   grep smultt | count 1
-; RUN: llc < %s -march=arm -mattr=+v5TE | \
-; RUN:   grep smlabt | count 1
+; RUN: llc < %s -march=arm -mcpu=generic
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 
 @x = weak global i16 0          ; <i16*> [#uses=1]
 @y = weak global i16 0          ; <i16*> [#uses=0]
 
 define i32 @f1(i32 %y) {
+; CHECK: f1
+; CHECK: smulbt
         %tmp = load i16* @x             ; <i16> [#uses=1]
         %tmp1 = add i16 %tmp, 2         ; <i16> [#uses=1]
         %tmp2 = sext i16 %tmp1 to i32           ; <i32> [#uses=1]
@@ -20,6 +16,8 @@ define i32 @f1(i32 %y) {
 }
 
 define i32 @f2(i32 %x, i32 %y) {
+; CHECK: f2
+; CHECK: smultt
         %tmp1 = ashr i32 %x, 16         ; <i32> [#uses=1]
         %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
         %tmp4 = mul i32 %tmp3, %tmp1            ; <i32> [#uses=1]
@@ -27,6 +25,8 @@ define i32 @f2(i32 %x, i32 %y) {
 }
 
 define i32 @f3(i32 %a, i16 %x, i32 %y) {
+; CHECK: f3
+; CHECK: smlabt
         %tmp = sext i16 %x to i32               ; <i32> [#uses=1]
         %tmp2 = ashr i32 %y, 16         ; <i32> [#uses=1]
         %tmp3 = mul i32 %tmp2, %tmp             ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index 465c7e6..b24f75a 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -mtriple=armv6-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=basic | FileCheck %s
+
+; The greedy register allocator uses a single CSR here, invalidating the test.
 
 @b = external global i64*
 
diff --git a/test/CodeGen/ARM/sub.ll b/test/CodeGen/ARM/sub.ll
index 81513e2..555b18e 100644
--- a/test/CodeGen/ARM/sub.ll
+++ b/test/CodeGen/ARM/sub.ll
@@ -12,7 +12,7 @@ define i64 @f1(i64 %a) {
 ; 66846720 = 0x03fc0000
 define i64 @f2(i64 %a) {
 ; CHECK: f2
-; CHECK: subs r0, r0, #255, 14
+; CHECK: subs r0, r0, #255, #14
 ; CHECK: sbc r1, r1, #0
     %tmp = sub i64 %a, 66846720
     ret i64 %tmp
@@ -27,3 +27,12 @@ define i64 @f3(i64 %a) {
    ret i64 %tmp
 }
 
+define i32 @f4(i32 %x) {
+entry:
+; CHECK: f4
+; CHECK: rsbs
+  %sub = sub i32 1, %x
+  %cmp = icmp ugt i32 %sub, 0
+  %sel = select i1 %cmp, i32 1, i32 %sub
+  ret i32 %sel
+}
diff --git a/test/CodeGen/ARM/thumb1-varalloc.ll b/test/CodeGen/ARM/thumb1-varalloc.ll
index 25093fe..aa88ae0 100644
--- a/test/CodeGen/ARM/thumb1-varalloc.ll
+++ b/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
 ; rdar://8819685
 
 @__bar = external hidden global i8*
@@ -12,12 +13,13 @@ entry:
 	%0 = load i8** @__bar, align 4
 	%1 = icmp eq i8* %0, null
 	br i1 %1, label %bb1, label %bb3
+; CHECK: bne
 		
 bb1:
 	store i32 1026, i32* %size, align 4
 	%2 = alloca [1026 x i8], align 1
-; CHECK: mov     r0, sp
-; CHECK: adds    r4, r0, r4
+; CHECK: mov     [[R0:r[0-9]+]], sp
+; CHECK: adds    {{r[0-9]+}}, [[R0]], {{r[0-9]+}}
 	%3 = getelementptr inbounds [1026 x i8]* %2, i32 0, i32 0
 	%4 = call i32 @_called_func(i8* %3, i32* %size) nounwind
 	%5 = icmp eq i32 %4, 0
diff --git a/test/CodeGen/ARM/trap.ll b/test/CodeGen/ARM/trap.ll
index b2f6b6e..38842a9 100644
--- a/test/CodeGen/ARM/trap.ll
+++ b/test/CodeGen/ARM/trap.ll
@@ -1,10 +1,15 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=INSTR
+; RUN: llc < %s -mtriple=arm-apple-darwin -trap-func=_trap | FileCheck %s -check-prefix=FUNC
 ; rdar://7961298
+; rdar://9249183
 
 define void @t() nounwind {
 entry:
-; CHECK: t:
-; CHECK: trap
+; INSTR: t:
+; INSTR: trap
+
+; FUNC: t:
+; FUNC: bl __trap
   call void @llvm.trap()
   unreachable
 }
diff --git a/test/CodeGen/ARM/umulo-32.ll b/test/CodeGen/ARM/umulo-32.ll
index aa7d28a..fa5c016 100644
--- a/test/CodeGen/ARM/umulo-32.ll
+++ b/test/CodeGen/ARM/umulo-32.ll
@@ -12,3 +12,30 @@ define i32 @func(i32 %a) nounwind {
 }
 
 declare %umul.ty @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+
+define i32 @f(i32 %argc, i8** %argv) ssp {
+; CHECK: func
+; CHECK: str     r0
+; CHECK: movs    r2
+; CHECK: mov     r1
+; CHECK: mov     r3
+; CHECK: muldi3
+%1 = alloca i32, align 4
+%2 = alloca i32, align 4
+%3 = alloca i8**, align 4
+%m_degree = alloca i32, align 4
+store i32 0, i32* %1
+store i32 %argc, i32* %2, align 4
+store i8** %argv, i8*** %3, align 4
+store i32 10, i32* %m_degree, align 4
+%4 = load i32* %m_degree, align 4
+%5 = call %umul.ty @llvm.umul.with.overflow.i32(i32 %4, i32 8)
+%6 = extractvalue %umul.ty %5, 1
+%7 = extractvalue %umul.ty %5, 0
+%8 = select i1 %6, i32 -1, i32 %7
+%9 = call noalias i8* @_Znam(i32 %8)
+%10 = bitcast i8* %9 to double*
+ret i32 0
+}
+
+declare noalias i8* @_Znam(i32)
diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll
index b42e11f..a8237c6 100644
--- a/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@@ -8,14 +8,14 @@
 define void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
 entry:
 ; GENERIC: t:
-; GENERIC: ldrb r2
-; GENERIC: ldrb r3
-; GENERIC: ldrb r12
-; GENERIC: ldrb r1
-; GENERIC: strb r1
-; GENERIC: strb r12
-; GENERIC: strb r3
-; GENERIC: strb r2
+; GENERIC: ldrb [[R2:r[0-9]+]]
+; GENERIC: ldrb [[R3:r[0-9]+]]
+; GENERIC: ldrb [[R12:r[0-9]+]]
+; GENERIC: ldrb [[R1:r[0-9]+]]
+; GENERIC: strb [[R1]]
+; GENERIC: strb [[R12]]
+; GENERIC: strb [[R3]]
+; GENERIC: strb [[R2]]
 
 ; DARWIN_V6: t:
 ; DARWIN_V6: ldr r1
diff --git a/test/CodeGen/ARM/undef-sext.ll b/test/CodeGen/ARM/undef-sext.ll
new file mode 100644
index 0000000..2c28da3
--- /dev/null
+++ b/test/CodeGen/ARM/undef-sext.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+
+; No need to sign-extend undef.
+
+define i32 @t(i32* %a) nounwind {
+entry:
+; CHECK: t:
+; CHECK: ldr r0, [r0]
+; CHECK: bx lr
+  %0 = sext i16 undef to i32
+  %1 = getelementptr inbounds i32* %a, i32 %0
+  %2 = load i32* %1, align 4
+  ret i32 %2
+}
diff --git a/test/CodeGen/ARM/va_arg.ll b/test/CodeGen/ARM/va_arg.ll
index 7cb9762..bb40453 100644
--- a/test/CodeGen/ARM/va_arg.ll
+++ b/test/CodeGen/ARM/va_arg.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -pre-RA-sched=source | FileCheck %s
 ; Test that we correctly align elements when using va_arg
 
 ; CHECK: test1:
 ; CHECK-NOT: bfc
-; CHECK: add	r0, r0, #7
-; CHECK: bfc	r0, #0, #3
+; CHECK: add	[[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
+; CHECK: bfc	[[REG]], #0, #3
 ; CHECK-NOT: bfc
 
 define i64 @test1(i32 %i, ...) nounwind optsize {
@@ -19,8 +19,8 @@ entry:
 
 ; CHECK: test2:
 ; CHECK-NOT: bfc
-; CHECK: add	r0, r0, #7
-; CHECK: bfc	r0, #0, #3
+; CHECK: add	[[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
+; CHECK: bfc	[[REG]], #0, #3
 ; CHECK-NOT:	bfc
 ; CHECK: bx	lr
 
diff --git a/test/CodeGen/ARM/vbsl-constant.ll b/test/CodeGen/ARM/vbsl-constant.ll
new file mode 100644
index 0000000..14e668e
--- /dev/null
+++ b/test/CodeGen/ARM/vbsl-constant.ll
@@ -0,0 +1,115 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: v_bsli8:
+;CHECK: vldr.64
+;CHECK: vldr.64
+;CHECK: vbsl
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = and <8 x i8> %tmp1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+	%tmp6 = and <8 x i8> %tmp3, <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
+	%tmp7 = or <8 x i8> %tmp4, %tmp6
+	ret <8 x i8> %tmp7
+}
+
+define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: v_bsli16:
+;CHECK: vldr.64
+;CHECK: vldr.64
+;CHECK: vbsl
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = and <4 x i16> %tmp1, <i16 3, i16 3, i16 3, i16 3>
+	%tmp6 = and <4 x i16> %tmp3, <i16 -4, i16 -4, i16 -4, i16 -4>
+	%tmp7 = or <4 x i16> %tmp4, %tmp6
+	ret <4 x i16> %tmp7
+}
+
+define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: v_bsli32:
+;CHECK: vldr.64
+;CHECK: vldr.64
+;CHECK: vbsl
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = and <2 x i32> %tmp1, <i32 3, i32 3>
+	%tmp6 = and <2 x i32> %tmp3, <i32 -4, i32 -4>
+	%tmp7 = or <2 x i32> %tmp4, %tmp6
+	ret <2 x i32> %tmp7
+}
+
+define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
+;CHECK: v_bsli64:
+;CHECK: vldr.64
+;CHECK: vldr.64
+;CHECK: vldr.64
+;CHECK: vbsl
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = load <1 x i64>* %C
+	%tmp4 = and <1 x i64> %tmp1, <i64 3>
+	%tmp6 = and <1 x i64> %tmp3, <i64 -4>
+	%tmp7 = or <1 x i64> %tmp4, %tmp6
+	ret <1 x i64> %tmp7
+}
+
+define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: v_bslQi8:
+;CHECK: vldmia
+;CHECK: vldmia
+;CHECK: vbsl
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = load <16 x i8>* %C
+	%tmp4 = and <16 x i8> %tmp1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+	%tmp6 = and <16 x i8> %tmp3, <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
+	%tmp7 = or <16 x i8> %tmp4, %tmp6
+	ret <16 x i8> %tmp7
+}
+
+define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: v_bslQi16:
+;CHECK: vldmia
+;CHECK: vldmia
+;CHECK: vbsl
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = load <8 x i16>* %C
+	%tmp4 = and <8 x i16> %tmp1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+	%tmp6 = and <8 x i16> %tmp3, <i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4>
+	%tmp7 = or <8 x i16> %tmp4, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: v_bslQi32:
+;CHECK: vldmia
+;CHECK: vldmia
+;CHECK: vbsl
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = load <4 x i32>* %C
+	%tmp4 = and <4 x i32> %tmp1, <i32 3, i32 3, i32 3, i32 3>
+	%tmp6 = and <4 x i32> %tmp3, <i32 -4, i32 -4, i32 -4, i32 -4>
+	%tmp7 = or <4 x i32> %tmp4, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
+;CHECK: v_bslQi64:
+;CHECK: vldmia
+;CHECK: vldmia
+;CHECK: vldmia
+;CHECK: vbsl
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = load <2 x i64>* %C
+	%tmp4 = and <2 x i64> %tmp1, <i64 3, i64 3>
+	%tmp6 = and <2 x i64> %tmp3, <i64 -4, i64 -4>
+	%tmp7 = or <2 x i64> %tmp4, %tmp6
+	ret <2 x i64> %tmp7
+}
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll
index c3c4cb3..2243bac 100644
--- a/test/CodeGen/ARM/vcgt.ll
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
 
 define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vcgts8:
@@ -161,9 +162,9 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ; rdar://7923010
 define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: vcgt_zext:
-;CHECK: vmov.i32 q10, #0x1
-;CHECK: vcgt.f32 q8
-;CHECK: vand q8, q8, q10
+;CHECK: vmov.i32 [[Q0:q[0-9]+]], #0x1
+;CHECK: vcgt.f32 [[Q1:q[0-9]+]]
+;CHECK: vand [[Q2:q[0-9]+]], [[Q1]], [[Q0]]
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
index 3ab0cfc..81bdc44 100644
--- a/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -105,3 +105,21 @@ define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind {
   store i64 %t1, i64* %ptr
   ret void
 }
+
+; Test trying to do a AND Combine on illegal types.
+define void @andVec(<3 x i8>* %A) nounwind {
+  %tmp = load <3 x i8>* %A, align 4
+  %and = and <3 x i8> %tmp, <i8 7, i8 7, i8 7>
+  store <3 x i8> %and, <3 x i8>* %A
+  ret void
+}
+
+
+; Test trying to do an OR Combine on illegal types.
+define void @orVec(<3 x i8>* %A) nounwind {
+  %tmp = load <3 x i8>* %A, align 4
+  %or = or <3 x i8> %tmp, <i8 7, i8 7, i8 7>
+  store <3 x i8> %or, <3 x i8>* %A
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
index 55abefe..49a042b 100644
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -125,11 +125,11 @@ define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind {
 ; The actual shuffle code only handles some cases, make sure we check
 ; this rather than blindly emitting a VECTOR_SHUFFLE (infinite
 ; lowering loop can result otherwise).
-define <8 x i8> @test_illegal(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: test_illegal:
-;CHECK: vst1.8
-       %tmp1 = load <16 x i8>* %A
-       %tmp2 = load <16 x i8>* %B
-       %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 25, i32 3, i32 2, i32 2, i32 26>
-       ret <8 x i8> %tmp3
+;CHECK: vst1.16
+       %tmp1 = load <8 x i16>* %A
+       %tmp2 = load <8 x i16>* %B
+       %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 13, i32 3, i32 2, i32 2, i32 9>
+       ret <8 x i16> %tmp3
 }
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index 44a44af..49a6982 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra -regalloc=basic | FileCheck %s
 
 define void @test(float* %P, double* %D) {
 	%A = load float* %P		; <float> [#uses=1]
@@ -40,9 +41,9 @@ define void @test_ext_round(float* %P, double* %D) {
 ;CHECK: test_ext_round:
 	%a = load float* %P		; <float> [#uses=1]
 ;CHECK: vcvt.f64.f32
+;CHECK: vcvt.f32.f64
 	%b = fpext float %a to double		; <double> [#uses=1]
 	%A = load double* %D		; <double> [#uses=1]
-;CHECK: vcvt.f32.f64
 	%B = fptrunc double %A to float		; <float> [#uses=1]
 	store double %b, double* %D
 	store float %B, float* %P
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index c886125..e524395 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
 
 define <8 x i8> @vld1i8(i8* %A) nounwind {
 ;CHECK: vld1i8:
@@ -19,7 +20,7 @@ define <4 x i16> @vld1i16(i16* %A) nounwind {
 ;Check for a post-increment updating load. 
 define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
 ;CHECK: vld1i16_update:
-;CHECK: vld1.16 {d16}, [r1]!
+;CHECK: vld1.16 {d16}, [{{r[0-9]+}}]!
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
@@ -39,7 +40,7 @@ define <2 x i32> @vld1i32(i32* %A) nounwind {
 ;Check for a post-increment updating load with register increment.
 define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
 ;CHECK: vld1i32_update:
-;CHECK: vld1.32 {d16}, [r2], r1
+;CHECK: vld1.32 {d16}, [{{r[0-9]+}}], {{r[0-9]+}}
 	%A = load i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
@@ -75,7 +76,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind {
 ;Check for a post-increment updating load.
 define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
 ;CHECK: vld1Qi8_update:
-;CHECK: vld1.8 {d16, d17}, [r1, :64]!
+;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}, :64]!
 	%A = load i8** %ptr
 	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
 	%tmp2 = getelementptr i8* %A, i32 16
@@ -132,8 +133,6 @@ declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly
 ; Do not crash if the vld1 result is not used.
 define void @unused_vld1_result() {
 entry:
-;CHECK: unused_vld1_result
-;CHECK: vld1.32
   %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) 
   call void @llvm.trap()
   unreachable
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index dde530f..b495319 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
 
 %struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
 %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
@@ -36,7 +37,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
 ;Check for a post-increment updating load with register increment.
 define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
 ;CHECK: vld3i16_update:
-;CHECK: vld3.16 {d16, d17, d18}, [r2], r1
+;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}}
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
@@ -121,8 +122,8 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind {
 ;Check for a post-increment updating load. 
 define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
 ;CHECK: vld3Qi32_update:
-;CHECK: vld3.32 {d16, d18, d20}, [r1]!
-;CHECK: vld3.32 {d17, d19, d21}, [r1]!
+;CHECK: vld3.32 {d16, d18, d20}, [r[[R:[0-9]+]]]!
+;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]!
 	%A = load i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 770ed07..805aad5 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
 
 define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vld1lanei8:
@@ -279,7 +280,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;Check for a post-increment updating load with register increment.
 define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
 ;CHECK: vld3laneQi16_update:
-;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r2], r1
+;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
@@ -490,7 +491,7 @@ declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x flo
 ; in the QPR_VFP2 regclass, it needs to be copied to a QPR regclass because
 ; we don't currently have a QQQQ_VFP2 super-regclass.  (The "0" for the low
 ; part of %ins67 is supposed to be loaded by a VLDRS instruction in this test.)
-define void @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind {
+define <8 x i16> @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind {
 ;CHECK: test_qqqq_regsequence_subreg
 ;CHECK: vld3.16
   %tmp63 = extractvalue [6 x i64] %b, 5
@@ -499,8 +500,12 @@ define void @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind {
   %ins67 = or i128 %tmp65, 0
   %tmp78 = bitcast i128 %ins67 to <8 x i16>
   %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2)
-  call void @llvm.trap()
-  unreachable
+  %tmp3 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 0
+  %tmp4 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 1
+  %tmp5 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 2
+  %tmp6 = add <8 x i16> %tmp3, %tmp4
+  %tmp7 = add <8 x i16> %tmp5, %tmp6
+  ret <8 x i16> %tmp7
 }
 
 declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index ee033ca..1fd6581 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 
 define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vmuli8:
@@ -158,6 +158,15 @@ define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 	ret <8 x i16> %tmp5
 }
 
+define <8 x i16> @vmulls8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmulls8_int:
+;CHECK: vmull.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
 define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vmulls16:
 ;CHECK: vmull.s16
@@ -169,6 +178,15 @@ define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 	ret <4 x i32> %tmp5
 }
 
+define <4 x i32> @vmulls16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmulls16_int:
+;CHECK: vmull.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
 define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vmulls32:
 ;CHECK: vmull.s32
@@ -180,6 +198,15 @@ define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 	ret <2 x i64> %tmp5
 }
 
+define <2 x i64> @vmulls32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmulls32_int:
+;CHECK: vmull.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
 define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vmullu8:
 ;CHECK: vmull.u8
@@ -191,6 +218,15 @@ define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 	ret <8 x i16> %tmp5
 }
 
+define <8 x i16> @vmullu8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullu8_int:
+;CHECK: vmull.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
 define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vmullu16:
 ;CHECK: vmull.u16
@@ -202,6 +238,15 @@ define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 	ret <4 x i32> %tmp5
 }
 
+define <4 x i32> @vmullu16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmullu16_int:
+;CHECK: vmull.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
 define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vmullu32:
 ;CHECK: vmull.u32
@@ -213,6 +258,15 @@ define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 	ret <2 x i64> %tmp5
 }
 
+define <2 x i64> @vmullu32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmullu32_int:
+;CHECK: vmull.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
 define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vmullp8:
 ;CHECK: vmull.p8
@@ -233,6 +287,15 @@ entry:
   ret <4 x i32> %3
 }
 
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16_int(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes16_int
+; CHECK: vmull.s16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
 define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
 entry:
 ; CHECK: test_vmull_lanes32
@@ -244,6 +307,15 @@ entry:
   ret <2 x i64> %3
 }
 
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32_int(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes32_int
+; CHECK: vmull.s32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
 define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
 entry:
 ; CHECK: test_vmull_laneu16
@@ -255,6 +327,15 @@ entry:
   ret <4 x i32> %3
 }
 
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16_int(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu16_int
+; CHECK: vmull.u16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
 define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
 entry:
 ; CHECK: test_vmull_laneu32
@@ -266,6 +347,23 @@ entry:
   ret <2 x i64> %3
 }
 
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32_int(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu32_int
+; CHECK: vmull.u32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
 declare <8 x i16>  @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
 
 
@@ -339,3 +437,58 @@ define <2 x i64> @vmull_extvec_u32(<2 x i32> %arg) nounwind {
   %tmp4 = mul <2 x i64> %tmp3, <i64 1234, i64 1234>
   ret <2 x i64> %tmp4
 }
+
+; rdar://9197392
+define void @distribue(i16* %dst, i8* %src, i32 %mul) nounwind {
+entry:
+; CHECK: distribue:
+; CHECK: vmull.u8 [[REG1:(q[0-9]+)]], d{{.*}}, [[REG2:(d[0-9]+)]]
+; CHECK: vmlal.u8 [[REG1]], d{{.*}}, [[REG2]]
+  %0 = trunc i32 %mul to i8
+  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
+  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1)
+  %4 = bitcast <16 x i8> %3 to <2 x double>
+  %5 = extractelement <2 x double> %4, i32 1
+  %6 = bitcast double %5 to <8 x i8>
+  %7 = zext <8 x i8> %6 to <8 x i16>
+  %8 = zext <8 x i8> %2 to <8 x i16>
+  %9 = extractelement <2 x double> %4, i32 0
+  %10 = bitcast double %9 to <8 x i8>
+  %11 = zext <8 x i8> %10 to <8 x i16>
+  %12 = add <8 x i16> %7, %11
+  %13 = mul <8 x i16> %12, %8
+  %14 = bitcast i16* %dst to i8*
+  tail call void @llvm.arm.neon.vst1.v8i16(i8* %14, <8 x i16> %13, i32 2)
+  ret void
+}
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+
+; Take advantage of the Cortex-A8 multiplier accumulator forward.
+
+%struct.uint8x8_t = type { <8 x i8> }
+
+define void @distribue2(%struct.uint8x8_t* nocapture %dst, i8* %src, i32 %mul) nounwind {
+entry:
+; CHECK: distribue2
+; CHECK-NOT: vadd.i8
+; CHECK: vmul.i8
+; CHECK: vmla.i8
+  %0 = trunc i32 %mul to i8
+  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
+  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1)
+  %4 = bitcast <16 x i8> %3 to <2 x double>
+  %5 = extractelement <2 x double> %4, i32 1
+  %6 = bitcast double %5 to <8 x i8>
+  %7 = extractelement <2 x double> %4, i32 0
+  %8 = bitcast double %7 to <8 x i8>
+  %9 = add <8 x i8> %6, %8
+  %10 = mul <8 x i8> %9, %2
+  %11 = getelementptr inbounds %struct.uint8x8_t* %dst, i32 0, i32 0
+  store <8 x i8> %10, <8 x i8>* %11, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index d262303..e3372a0 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -O0 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -disable-arm-fast-isel -O0 | FileCheck %s
 
 define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst3i8:
diff --git a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
index 3ee5e8d..50fccb4 100644
--- a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
+++ b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs
+; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs -regalloc=greedy
 
 ; Provoke an error in LowerSubregsPass::LowerExtract where the live range of a
 ; super-register is illegally extended.
diff --git a/test/CodeGen/CellSPU/jumptable.ll b/test/CodeGen/CellSPU/jumptable.ll
index 42b41b3..87376ef 100644
--- a/test/CodeGen/CellSPU/jumptable.ll
+++ b/test/CodeGen/CellSPU/jumptable.ll
@@ -1,4 +1,4 @@
-;RUN: llc --march=cellspu %s -o - | FileCheck %s
+;RUN: llc --march=cellspu -disable-cgp-branch-opts %s -o - | FileCheck %s
 ; This is to check that emitting jumptables doesn't crash llc
 define i32 @test(i32 %param) {
 entry:
diff --git a/test/CodeGen/CellSPU/loads.ll b/test/CodeGen/CellSPU/loads.ll
index 03d7ad1..4771752 100644
--- a/test/CodeGen/CellSPU/loads.ll
+++ b/test/CodeGen/CellSPU/loads.ll
@@ -50,3 +50,10 @@ define i32 @load_misaligned( i32* %ptr ){
   %rv = load i32* %ptr, align 2
   ret i32 %rv
 }
+
+define <4 x i32> @load_null_vec( ) {
+;CHECK: lqa
+;CHECK: bi $lr
+	%rv = load <4 x i32>* null
+	ret <4 x i32> %rv
+}
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
index e117208..b1219e6 100644
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -3,9 +3,9 @@
 ; RUN: grep roth         %t1.s | count 8
 ; RUN: grep roti.*5      %t1.s | count 1
 ; RUN: grep roti.*27     %t1.s | count 1
-; RUN grep rothi.*5      %t1.s | count 2
-; RUN grep rothi.*11     %t1.s | count 1
-; RUN grep rothi.*,.3    %t1.s | count 1
+; RUN: grep rothi.*5      %t1.s | count 2
+; RUN: grep rothi.*11     %t1.s | count 1
+; RUN: grep rothi.*,.3    %t1.s | count 1
 ; RUN: grep andhi        %t1.s | count 4
 ; RUN: grep shlhi        %t1.s | count 4
 ; RUN: cat %t1.s | FileCheck %s
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
index 92390ab..c4a5abd 100644
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep {shlh	}  %t1.s | count 9
+; RUN: grep {shlh	}  %t1.s | count 10
 ; RUN: grep {shlhi	}  %t1.s | count 3
-; RUN: grep {shl	}  %t1.s | count 9
+; RUN: grep {shl	}  %t1.s | count 11
 ; RUN: grep {shli	}  %t1.s | count 3
 ; RUN: grep {xshw	}  %t1.s | count 5
 ; RUN: grep {and	}  %t1.s | count 14
@@ -14,15 +14,12 @@
 ; RUN: grep {rotqbyi	}  %t1.s | count 1
 ; RUN: grep {rotqbii	}  %t1.s | count 2
 ; RUN: grep {rotqbybi	}  %t1.s | count 1
-; RUN: grep {sfi	}  %t1.s | count 4
+; RUN: grep {sfi	}  %t1.s | count 6
 ; RUN: cat %t1.s | FileCheck %s
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
-; Vector shifts are not currently supported in gcc or llvm assembly. These are
-; not tested.
-
 ; Shift left i16 via register, note that the second operand to shl is promoted
 ; to a 32-bit type:
 
@@ -293,3 +290,55 @@ define i128 @test_lshr_i128( i128 %val ) {
 	%rv = lshr i128 %val, 64
 	ret i128 %rv
 }
+
+;Vector shifts
+define <2 x i32> @shl_v2i32(<2 x i32> %val, <2 x i32> %sh) {
+;CHECK: shl
+;CHECK: bi $lr
+	%rv = shl <2 x i32> %val, %sh
+	ret <2 x i32> %rv
+}
+
+define <4 x i32> @shl_v4i32(<4 x i32> %val, <4 x i32> %sh) {
+;CHECK: shl
+;CHECK: bi $lr
+	%rv = shl <4 x i32> %val, %sh
+	ret <4 x i32> %rv
+}
+
+define <8 x i16> @shl_v8i16(<8 x i16> %val, <8 x i16> %sh) {
+;CHECK: shlh
+;CHECK: bi $lr
+	%rv = shl <8 x i16> %val, %sh
+	ret <8 x i16> %rv
+}
+
+define <4 x i32> @lshr_v4i32(<4 x i32> %val, <4 x i32> %sh) {
+;CHECK: rotm
+;CHECK: bi $lr
+	%rv = lshr <4 x i32> %val, %sh
+	ret <4 x i32> %rv
+}
+
+define <8 x i16> @lshr_v8i16(<8 x i16> %val, <8 x i16> %sh) {
+;CHECK: sfhi
+;CHECK: rothm
+;CHECK: bi $lr
+	%rv = lshr <8 x i16> %val, %sh
+	ret <8 x i16> %rv
+}
+
+define <4 x i32> @ashr_v4i32(<4 x i32> %val, <4 x i32> %sh) {
+;CHECK: rotma
+;CHECK: bi $lr
+	%rv = ashr <4 x i32> %val, %sh
+	ret <4 x i32> %rv
+}
+
+define <8 x i16> @ashr_v8i16(<8 x i16> %val, <8 x i16> %sh) {
+;CHECK: sfhi
+;CHECK: rotmah
+;CHECK: bi $lr
+	%rv = ashr <8 x i16> %val, %sh
+	ret <8 x i16> %rv
+}
diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll
index 7e0bf06..6ca5b08 100644
--- a/test/CodeGen/CellSPU/stores.ll
+++ b/test/CodeGen/CellSPU/stores.ll
@@ -171,3 +171,11 @@ define void @store_v8( <8 x float> %val, <8 x float>* %ptr )
 	store <8 x float> %val, <8 x float>* %ptr
 	ret void
 }
+
+define void @store_null_vec( <4 x i32> %val ) {
+; FIXME - this is for some reason compiled into a il+stqd, not a sta. 
+;CHECK: stqd
+;CHECK: bi $lr
+	store <4 x i32> %val, <4 x i32>* null
+	ret void
+}
diff --git a/test/CodeGen/CellSPU/v2f32.ll b/test/CodeGen/CellSPU/v2f32.ll
index efd0320..09e15ff 100644
--- a/test/CodeGen/CellSPU/v2f32.ll
+++ b/test/CodeGen/CellSPU/v2f32.ll
@@ -33,6 +33,7 @@ define %vec @test_mul(%vec %param)
  ret %vec %1
 }
 
+; CHECK: test_splat:
 define %vec @test_splat(float %param ) {
 ;CHECK: lqa
 ;CHECK: shufb
@@ -43,16 +44,17 @@ define %vec @test_splat(float %param ) {
 }
 
 define void @test_store(%vec %val, %vec* %ptr){
-
+; CHECK: test_store:
 ;CHECK: stqd 
-  store %vec undef, %vec* null
+  store %vec zeroinitializer, %vec* null
 
-;CHECK: stqd $3, 0(${{.}})
+;CHECK: stqd $3, 0(${{.*}})
 ;CHECK: bi $lr
   store %vec %val, %vec* %ptr
   ret void
 }
 
+; CHECK: test_insert:
 define %vec @test_insert(){
 ;CHECK: cwd
 ;CHECK: shufb $3
@@ -61,6 +63,8 @@ define %vec @test_insert(){
   ret %vec %rv
 }
 
+; CHECK: test_unaligned_store:
+
 define void @test_unaligned_store()  {
 ;CHECK:	cdd
 ;CHECK:	shufb
@@ -68,7 +72,7 @@ define void @test_unaligned_store()  {
   %data = alloca [4 x float], align 16         ; <[4 x float]*> [#uses=1]
   %ptr = getelementptr [4 x float]* %data, i32 0, i32 2 ; <float*> [#uses=1]
   %vptr = bitcast float* %ptr to  <2 x float>* ; <[1 x <2 x float>]*> [#uses=1]
-  store <2 x float> undef, <2 x float>* %vptr
+  store <2 x float> zeroinitializer, <2 x float>* %vptr
   ret void
 }
 
diff --git a/test/CodeGen/Generic/crash.ll b/test/CodeGen/Generic/crash.ll
index 04273988..e7cc7e3 100644
--- a/test/CodeGen/Generic/crash.ll
+++ b/test/CodeGen/Generic/crash.ll
@@ -38,3 +38,31 @@ unreachable
 declare void @Parse_Vector(double*)
 declare i32 @llvm.objectsize.i32(i8*, i1)
 
+
+; PR9578
+%struct.S0 = type { i32, i8, i32 }
+
+define void @func_82() nounwind optsize {
+entry:
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i, %entry
+  br i1 undef, label %func_74.exit.for.cond29.thread_crit_edge, label %for.body.i
+
+func_74.exit.for.cond29.thread_crit_edge:         ; preds = %for.body.i
+  %f13576.pre = getelementptr inbounds %struct.S0* undef, i64 0, i32 1
+  store i8 0, i8* %f13576.pre, align 4, !tbaa !0
+  br label %lbl_468
+
+lbl_468:                                          ; preds = %lbl_468, %func_74.exit.for.cond29.thread_crit_edge
+  %f13577.ph = phi i8* [ %f13576.pre, %func_74.exit.for.cond29.thread_crit_edge ], [ %f135.pre, %lbl_468 ]
+  store i8 1, i8* %f13577.ph, align 1
+  %f135.pre = getelementptr inbounds %struct.S0* undef, i64 0, i32 1
+  br i1 undef, label %lbl_468, label %for.end74
+
+for.end74:                                        ; preds = %lbl_468
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/MBlaze/fsl.ll b/test/CodeGen/MBlaze/fsl.ll
index f9c6205..5444f82 100644
--- a/test/CodeGen/MBlaze/fsl.ll
+++ b/test/CodeGen/MBlaze/fsl.ll
@@ -3,7 +3,7 @@
 ; dynamic version of the instructions and that constant values use the
 ; constant version of the instructions.
 ;
-; RUN: llc < %s -march=mblaze | FileCheck %s
+; RUN: llc -O3 < %s -march=mblaze | FileCheck %s
 
 declare i32 @llvm.mblaze.fsl.get(i32 %port)
 declare i32 @llvm.mblaze.fsl.aget(i32 %port)
@@ -55,8 +55,7 @@ declare void @llvm.mblaze.fsl.tnaput(i32 %port)
 declare void @llvm.mblaze.fsl.tncput(i32 %port)
 declare void @llvm.mblaze.fsl.tncaput(i32 %port)
 
-define i32 @fsl_get(i32 %port)
-{
+define void @fsl_get(i32 %port) {
     ; CHECK:        fsl_get:
     %v0  = call i32 @llvm.mblaze.fsl.get(i32 %port)
     ; CHECK:        getd
@@ -122,12 +121,11 @@ define i32 @fsl_get(i32 %port)
     ; CHECK-NEXT:   tnecgetd
     %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 %port)
     ; CHECK-NEXT:   tnecagetd
-    ret i32 1
+    ret void
     ; CHECK:        rtsd
 }
 
-define i32 @fslc_get()
-{
+define void @fslc_get() {
     ; CHECK:        fslc_get:
     %v0  = call i32 @llvm.mblaze.fsl.get(i32 1)
     ; CHECK:        get
@@ -224,12 +222,11 @@ define i32 @fslc_get()
     %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 1)
     ; CHECK-NOT:    tnecagetd
     ; CHECK:        tnecaget
-    ret i32 1
+    ret void
     ; CHECK:        rtsd
 }
 
-define void @putfsl(i32 %value, i32 %port)
-{
+define void @putfsl(i32 %value, i32 %port) {
     ; CHECK:        putfsl:
     call void @llvm.mblaze.fsl.put(i32 %value, i32 %port)
     ; CHECK:        putd
@@ -267,8 +264,7 @@ define void @putfsl(i32 %value, i32 %port)
     ; CHECK:        rtsd
 }
 
-define void @putfsl_const(i32 %value)
-{
+define void @putfsl_const(i32 %value) {
     ; CHECK:        putfsl_const:
     call void @llvm.mblaze.fsl.put(i32 %value, i32 1)
     ; CHECK-NOT:    putd
diff --git a/test/CodeGen/MBlaze/loop.ll b/test/CodeGen/MBlaze/loop.ll
index 8973f75..7439d0b 100644
--- a/test/CodeGen/MBlaze/loop.ll
+++ b/test/CodeGen/MBlaze/loop.ll
@@ -29,14 +29,12 @@ loop_inner_finish:
     %inner.5 = add i32 %inner.2, 1
     call i32 (i8*,...)* @printf( i8* getelementptr([19 x i8]* @MSG,i32 0,i32 0),
                                  i32 %inner.0, i32 %inner.1, i32 %inner.2 )
-    ; CHECK:        brlid
-    ; CHECK:        addik {{.*, 1}}
 
     %inner.6 = icmp eq i32 %inner.5, 100
-    ; CHECK:        cmp
+    ; CHECK:        cmp [[REG:r[0-9]*]]
 
     br i1 %inner.6, label %loop_inner, label %loop_outer_finish
-    ; CHECK:        {{beq|bne}}
+    ; CHECK:        {{beqid|bneid}} [[REG]]
 
 loop_outer_finish:
     %outer.1 = add i32 %outer.0, 1
diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
index 20bd888..94dfe35 100644
--- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll
+++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=mips -o %t
 ; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2
-; RUN: grep {rodata.cst4,"aM",@progbits} %t | count 1
+; RUN: grep {.rodata.cst4,"aM",@progbits} %t | count 1
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-unknown-psp-elf"
 
diff --git a/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
index ca837ff..519e4b9 100644
--- a/test/CodeGen/Mips/2008-07-23-fpcmp.ll
+++ b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
@@ -2,6 +2,10 @@
 ; RUN: grep {c\\..*\\.s} %t | count 3
 ; RUN: grep {bc1\[tf\]} %t | count 3
 
+; FIXME: Disabled because branch instructions are generated where
+; conditional move instructions are expected.
+; REQUIRES: disabled
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-unknown-psp-elf"
 
diff --git a/test/CodeGen/Mips/2008-07-29-icmp.ll b/test/CodeGen/Mips/2008-07-29-icmp.ll
index 52a4b08..e85a749 100644
--- a/test/CodeGen/Mips/2008-07-29-icmp.ll
+++ b/test/CodeGen/Mips/2008-07-29-icmp.ll
@@ -1,5 +1,9 @@
 ; RUN: llc < %s -march=mips | grep {b\[ne\]\[eq\]} | count 1
 
+; FIXME: Disabled because branch instructions are generated where
+; conditional move instructions are expected.
+; REQUIRES: disabled
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-unknown-psp-elf"
 
diff --git a/test/CodeGen/Mips/2008-08-06-Alloca.ll b/test/CodeGen/Mips/2008-08-06-Alloca.ll
index 7be7974..6dd4af1 100644
--- a/test/CodeGen/Mips/2008-08-06-Alloca.ll
+++ b/test/CodeGen/Mips/2008-08-06-Alloca.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2
+; RUN: llc < %s -march=mips -regalloc=basic | grep {subu.*sp} | count 2
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-unknown-psp-elf"
diff --git a/test/CodeGen/Mips/2010-07-20-Select.ll b/test/CodeGen/Mips/2010-07-20-Select.ll
index 891b5d9..e5e2c54 100644
--- a/test/CodeGen/Mips/2010-07-20-Select.ll
+++ b/test/CodeGen/Mips/2010-07-20-Select.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s
+; RUN: llc < %s -march=mips -relocation-model=static -regalloc=basic | FileCheck %s
 ; Fix PR7473
 
 define i32 @main() nounwind readnone {
@@ -9,12 +10,12 @@ entry:
   volatile store i32 0, i32* %c, align 4
   %0 = volatile load i32* %a, align 4             ; <i32> [#uses=1]
   %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
-; CHECK: addiu $3, $zero, 0
+; CHECK: addiu $[[R1:[0-9]+]], $zero, 0
   %iftmp.0.0 = select i1 %1, i32 3, i32 0         ; <i32> [#uses=1]
   %2 = volatile load i32* %c, align 4             ; <i32> [#uses=1]
   %3 = icmp eq i32 %2, 0                          ; <i1> [#uses=1]
-; CHECK: addiu $3, $zero, 3
-; CHECK: addu $2, $5, $3
+; CHECK: addiu $[[R1]], $zero, 3
+; CHECK: addu $2, ${{.}}, $[[R1]]
   %iftmp.2.0 = select i1 %3, i32 0, i32 5         ; <i32> [#uses=1]
   %4 = add nsw i32 %iftmp.2.0, %iftmp.0.0         ; <i32> [#uses=1]
   ret i32 %4
diff --git a/test/CodeGen/Mips/addc.ll b/test/CodeGen/Mips/addc.ll
new file mode 100644
index 0000000..e5d05b1
--- /dev/null
+++ b/test/CodeGen/Mips/addc.ll
@@ -0,0 +1,13 @@
+; RUN: llc  < %s -march=mipsel | FileCheck %s 
+; RUN: llc  < %s -march=mips   | FileCheck %s
+
+define void @f(i64 %l, i64* nocapture %p) nounwind {
+entry:
+; CHECK: lui  
+; CHECK: ori
+; CHECK: addu  
+  %add = add i64 %l, 1311768467294899695
+  store i64 %add, i64* %p, align 4 
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/analyzebranch.ll b/test/CodeGen/Mips/analyzebranch.ll
new file mode 100644
index 0000000..8f0bdf2
--- /dev/null
+++ b/test/CodeGen/Mips/analyzebranch.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+define double @foo(double %a, double %b) nounwind readnone {
+entry:
+; CHECK: bc1f $BB0_2
+; CHECK: nop
+; CHECK: # BB#1:    
+
+  %cmp = fcmp ogt double %a, 0.000000e+00
+  br i1 %cmp, label %if.end6, label %if.else
+
+if.else:                                          ; preds = %entry
+  %cmp3 = fcmp ogt double %b, 0.000000e+00
+  br i1 %cmp3, label %if.end6, label %return
+
+if.end6:                                          ; preds = %if.else, %entry
+  %c.0 = phi double [ %a, %entry ], [ 0.000000e+00, %if.else ]
+  %sub = fsub double %b, %c.0
+  %mul = fmul double %sub, 2.000000e+00
+  br label %return
+
+return:                                           ; preds = %if.else, %if.end6
+  %retval.0 = phi double [ %mul, %if.end6 ], [ 0.000000e+00, %if.else ]
+  ret double %retval.0
+}
+
+define void @f1(float %f) nounwind {
+entry:
+; CHECK: bc1t $BB1_2
+; CHECK: nop
+; CHECK: # BB#1:      
+  %cmp = fcmp une float %f, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @abort() noreturn
+  unreachable
+
+if.end:                                           ; preds = %entry
+  tail call void (...)* @f2() nounwind
+  ret void
+}
+
+declare void @abort() noreturn nounwind
+
+declare void @f2(...)
diff --git a/test/CodeGen/Mips/blockaddr.ll b/test/CodeGen/Mips/blockaddr.ll
new file mode 100644
index 0000000..e9af304
--- /dev/null
+++ b/test/CodeGen/Mips/blockaddr.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC
+
+@reg = common global i8* null, align 4
+
+define i8* @dummy(i8* %x) nounwind readnone noinline {
+entry:
+  ret i8* %x
+}
+
+; CHECK-PIC: lw  $[[R0:[0-9]+]], %got($tmp1)($gp)
+; CHECK-PIC: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp1)
+; CHECK-PIC: lw  $[[R1:[0-9]+]], %got($tmp2)($gp)
+; CHECK-PIC: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp2)
+; CHECK-STATIC: lui  $[[R2:[0-9]+]], %hi($tmp1)
+; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp1)
+; CHECK-STATIC: lui   $[[R3:[0-9]+]], %hi($tmp2)
+; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp2)
+define void @f() nounwind {
+entry:
+  %call = tail call i8* @dummy(i8* blockaddress(@f, %baz))
+  indirectbr i8* %call, [label %baz, label %foo]
+
+foo:                                              ; preds = %foo, %entry
+  store i8* blockaddress(@f, %foo), i8** @reg, align 4
+  br label %foo
+
+baz:                                              ; preds = %entry
+  store i8* null, i8** @reg, align 4
+  ret void
+}
diff --git a/test/CodeGen/Mips/buildpairextractelementf64.ll b/test/CodeGen/Mips/buildpairextractelementf64.ll
new file mode 100644
index 0000000..585bc25
--- /dev/null
+++ b/test/CodeGen/Mips/buildpairextractelementf64.ll
@@ -0,0 +1,23 @@
+; RUN: llc  < %s -march=mipsel | FileCheck %s
+; RUN: llc  < %s -march=mips   | FileCheck %s
+@a = external global i32
+
+define double @f(i32 %a1, double %d) nounwind {
+entry:
+; CHECK: mtc1
+; CHECK: mtc1
+  store i32 %a1, i32* @a, align 4
+  %add = fadd double %d, 2.000000e+00
+  ret double %add
+}
+
+define void @f3(double %d, i32 %a1) nounwind {
+entry:
+; CHECK: mfc1
+; CHECK: mfc1
+  tail call void @f2(i32 %a1, double %d) nounwind
+  ret void
+}
+
+declare void @f2(i32, double)
+
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index 7d3e025..8329c89 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -1,10 +1,11 @@
 ; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+; RUN: llc -march=mips -mcpu=4ke -regalloc=basic < %s | FileCheck %s
 
 @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
 @i3 = common global i32* null, align 4
 
-; CHECK:  lw  $3, %got(i3)($gp)
-; CHECK:  addiu $5, $gp, %got(i1)
+; CHECK:  lw  ${{[0-9]+}}, %got(i3)($gp)
+; CHECK:  addiu ${{[0-9]+}}, $gp, %got(i1)
 define i32* @cmov1(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
diff --git a/test/CodeGen/Mips/divrem.ll b/test/CodeGen/Mips/divrem.ll
new file mode 100644
index 0000000..398d1b7
--- /dev/null
+++ b/test/CodeGen/Mips/divrem.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+; CHECK: div $zero,
+define i32 @sdiv1(i32 %a0, i32 %a1) nounwind readnone {
+entry:
+  %div = sdiv i32 %a0, %a1
+  ret i32 %div
+}
+
+; CHECK: div $zero,
+define i32 @srem1(i32 %a0, i32 %a1) nounwind readnone {
+entry:
+  %rem = srem i32 %a0, %a1
+  ret i32 %rem
+}
+
+; CHECK: divu $zero,
+define i32 @udiv1(i32 %a0, i32 %a1) nounwind readnone {
+entry:
+  %div = udiv i32 %a0, %a1
+  ret i32 %div
+}
+
+; CHECK: divu $zero,
+define i32 @urem1(i32 %a0, i32 %a1) nounwind readnone {
+entry:
+  %rem = urem i32 %a0, %a1
+  ret i32 %rem
+}
+
+; CHECK: div $zero,
+define i32 @sdivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
+entry:
+  %rem = srem i32 %a0, %a1
+  store i32 %rem, i32* %r, align 4, !tbaa !0
+  %div = sdiv i32 %a0, %a1
+  ret i32 %div
+}
+
+; CHECK: divu $zero,
+define i32 @udivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
+entry:
+  %rem = urem i32 %a0, %a1
+  store i32 %rem, i32* %r, align 4, !tbaa !0
+  %div = udiv i32 %a0, %a1
+  ret i32 %div
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/Mips/fpbr.ll b/test/CodeGen/Mips/fpbr.ll
new file mode 100644
index 0000000..0a6478b
--- /dev/null
+++ b/test/CodeGen/Mips/fpbr.ll
@@ -0,0 +1,119 @@
+; RUN: llc  < %s -march=mipsel | FileCheck %s
+
+define void @func0(float %f2, float %f3) nounwind {
+entry:
+; CHECK: c.eq.s
+; CHECK: bc1f
+  %cmp = fcmp oeq float %f2, %f3
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @g0() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void (...)* @g1() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+declare void @g0(...)
+
+declare void @g1(...)
+
+define void @func1(float %f2, float %f3) nounwind {
+entry:
+; CHECK: c.olt.s
+; CHECK: bc1f
+  %cmp = fcmp olt float %f2, %f3
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @g0() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void (...)* @g1() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+define void @func2(float %f2, float %f3) nounwind {
+entry:
+; CHECK: c.ole.s
+; CHECK: bc1f
+  %cmp = fcmp ugt float %f2, %f3
+  br i1 %cmp, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @g0() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void (...)* @g1() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+define void @func3(double %f2, double %f3) nounwind {
+entry:
+; CHECK: c.eq.d
+; CHECK: bc1f
+  %cmp = fcmp oeq double %f2, %f3
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @g0() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void (...)* @g1() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+define void @func4(double %f2, double %f3) nounwind {
+entry:
+; CHECK: c.olt.d
+; CHECK: bc1f
+  %cmp = fcmp olt double %f2, %f3
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @g0() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void (...)* @g1() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+define void @func5(double %f2, double %f3) nounwind {
+entry:
+; CHECK: c.ole.d
+; CHECK: bc1f
+  %cmp = fcmp ugt double %f2, %f3
+  br i1 %cmp, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @g0() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void (...)* @g1() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
diff --git a/test/CodeGen/Mips/fpcmp.ll b/test/CodeGen/Mips/fpcmp.ll
new file mode 100644
index 0000000..c89ffe6
--- /dev/null
+++ b/test/CodeGen/Mips/fpcmp.ll
@@ -0,0 +1,23 @@
+; RUN: llc  < %s -march=mipsel -mcpu=4ke | FileCheck %s -check-prefix=CHECK-MIPS32R2
+; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-MIPS1
+
+@g1 = external global i32
+
+define i32 @f(float %f0, float %f1) nounwind {
+entry:
+; CHECK-MIPS32R2: c.olt.s
+; CHECK-MIPS32R2: movt
+; CHECK-MIPS32R2: c.olt.s
+; CHECK-MIPS32R2: movt
+; CHECK-MIPS1: c.olt.s
+; CHECK-MIPS1: bc1t
+; CHECK-MIPS1: c.olt.s
+; CHECK-MIPS1: bc1t
+  %cmp = fcmp olt float %f0, %f1
+  %conv = zext i1 %cmp to i32
+  %tmp2 = load i32* @g1, align 4
+  %add = add nsw i32 %tmp2, %conv
+  store i32 %add, i32* @g1, align 4
+  %cond = select i1 %cmp, i32 10, i32 20
+  ret i32 %cond
+}
diff --git a/test/CodeGen/Mips/internalfunc.ll b/test/CodeGen/Mips/internalfunc.ll
new file mode 100644
index 0000000..fdfa01a
--- /dev/null
+++ b/test/CodeGen/Mips/internalfunc.ll
@@ -0,0 +1,52 @@
+; RUN: llc  < %s -march=mips | FileCheck %s
+
+@caller.sf1 = internal unnamed_addr global void (...)* null, align 4
+@gf1 = external global void (...)*
+@.str = private unnamed_addr constant [3 x i8] c"f2\00"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+; CHECK: lw $[[R0:[0-9]+]], %got(f2)($gp)
+; CHECK: addiu $25, $[[R0]], %lo(f2)
+  tail call fastcc void @f2()
+  ret i32 0
+}
+
+define void @caller(i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: lw  $[[R1:[0-9]+]], %got(caller.sf1)($gp)
+; CHECK: addiu ${{[0-9]+}}, $[[R1]], %lo(caller.sf1)
+  %tobool = icmp eq i32 %a1, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %tmp1 = load void (...)** @caller.sf1, align 4
+  tail call void (...)* %tmp1() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+; CHECK: lw  $[[R2:[0-9]+]], %got(sf2)($gp)
+; CHECK: lw  $[[R3:[0-9]+]], %got(caller.sf1)($gp)
+; CHECK: addiu ${{[0-9]+}}, $[[R2]], %lo(sf2)
+; CHECK: addiu ${{[0-9]+}}, $[[R3]], %lo(caller.sf1)
+  %tobool3 = icmp ne i32 %a0, 0
+  %tmp4 = load void (...)** @gf1, align 4
+  %cond = select i1 %tobool3, void (...)* %tmp4, void (...)* bitcast (void ()* @sf2 to void (...)*)
+  store void (...)* %cond, void (...)** @caller.sf1, align 4
+  ret void
+}
+
+define internal void @sf2() nounwind {
+entry:
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) nounwind
+  ret void
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define internal fastcc void @f2() nounwind noinline {
+entry:
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) nounwind
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/largeimm1.ll b/test/CodeGen/Mips/largeimm1.ll
new file mode 100644
index 0000000..d65cc02
--- /dev/null
+++ b/test/CodeGen/Mips/largeimm1.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+; CHECK: lui $at, 49152
+; CHECK: lui $at, 16384
+define void @f() nounwind {
+entry:
+  %a1 = alloca [1073741824 x i8], align 1
+  %arrayidx = getelementptr inbounds [1073741824 x i8]* %a1, i32 0, i32 1048676
+  call void @f2(i8* %arrayidx) nounwind
+  ret void
+}
+
+declare void @f2(i8*)
diff --git a/test/CodeGen/Mips/o32_cc.ll b/test/CodeGen/Mips/o32_cc.ll
index b6df62b..3974cd4 100644
--- a/test/CodeGen/Mips/o32_cc.ll
+++ b/test/CodeGen/Mips/o32_cc.ll
@@ -61,8 +61,8 @@ entry:
 declare void @f4(i32, i32, i32, i32)
 
 ; $f12, $6, stack
-; CHECK: sw  $2, 16($sp)
-; CHECK: sw  $zero, 20($sp)
+; CHECK: sw
+; CHECK: sw
 ; CHECK: ldc1 $f12, %lo
 ; CHECK: addiu $6, $zero, 23
 define void @testlowercall5() nounwind {
@@ -98,8 +98,8 @@ entry:
 declare void @f7(float, i32, i32)
 
 ; $4, $5, $6, stack
-; CHECK: sw  $2, 16($sp)
-; CHECK: sw  $zero, 20($sp)
+; CHECK: sw
+; CHECK: sw
 ; CHECK: addiu $4, $zero, 22
 ; CHECK: addiu $5, $zero, 53
 ; CHECK: addiu $6, $zero, 44
@@ -115,7 +115,7 @@ declare void @f8(i32, i32, i32, double)
 ; CHECK: addiu $4, $zero, 32
 ; CHECK: addiu $5, $zero, 63
 ; CHECK: addiu $6, $zero, 54
-; CHECK: ori $7, $2, 0
+; CHECK: ori $7
 define void @testlowercall9() nounwind {
 entry:
   tail call void @f9(i32 32, i32 63, i32 54, float 1.100000e+01) nounwind
@@ -128,7 +128,7 @@ declare void @f9(i32, i32, i32, float)
 ; CHECK: addiu $4, $zero, 42
 ; CHECK: addiu $5, $zero, 73
 ; CHECK: addiu $6, $zero, 0
-; CHECK: ori $7, $2, 0
+; CHECK: ori $7
 define void @testlowercall10() nounwind {
 entry:
   tail call void @f10(i32 42, i32 73, double 2.700000e+01) nounwind
@@ -140,7 +140,7 @@ declare void @f10(i32, i32, double)
 ; $4, ($6, $7)
 ; CHECK: addiu $4, $zero, 52
 ; CHECK: addiu $6, $zero, 0
-; CHECK: ori $7, $2, 0
+; CHECK: ori $7
 define void @testlowercall11() nounwind {
 entry:
   tail call void @f11(i32 52, double 1.600000e+01) nounwind
@@ -152,8 +152,8 @@ declare void @f11(i32, double)
 ; $f12, $f14, $6, $7
 ; CHECK: lwc1 $f12, %lo
 ; CHECK: lwc1 $f14, %lo
-; CHECK: ori $6, $4, 0
-; CHECK: ori $7, $5, 0
+; CHECK: ori $6
+; CHECK: ori $7
 define void @testlowercall12() nounwind {
 entry:
   tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind
@@ -165,7 +165,7 @@ declare void @f12(float, float, float, float)
 ; $f12, $5, $6, $7
 ; CHECK: lwc1 $f12, %lo
 ; CHECK: addiu $5, $zero, 83
-; CHECK: ori $6, $3, 0
+; CHECK: ori $6
 ; CHECK: addiu $7, $zero, 25
 define void @testlowercall13() nounwind {
 entry:
@@ -179,7 +179,7 @@ declare void @f13(float, i32, float, i32)
 ; $f12, $f14, $7
 ; CHECK: ldc1 $f12, %lo
 ; CHECK: lwc1 $f14, %lo
-; CHECK: ori $7, $4, 0
+; CHECK: ori $7
 define void @testlowercall14() nounwind {
 entry:
   tail call void @f14(double 3.500000e+01, float 2.900000e+01, float 3.000000e+01) nounwind
@@ -192,7 +192,7 @@ declare void @f14(double, float, float)
 ; CHECK: lwc1 $f12, %lo
 ; CHECK: lwc1 $f14, %lo
 ; CHECK: addiu $6, $zero, 0
-; CHECK: ori $7, $4, 32768
+; CHECK: ori $7
 define void @testlowercall15() nounwind {
 entry:
   tail call void @f15(float 4.800000e+01, float 3.900000e+01, double 3.700000e+01) nounwind
@@ -203,9 +203,9 @@ declare void @f15(float, float, double)
 
 ; $4, $5, $6, $7
 ; CHECK: addiu $4, $zero, 62
-; CHECK: ori $5, $2, 0
+; CHECK: ori $5
 ; CHECK: addiu $6, $zero, 64
-; CHECK: ori $7, $3, 0
+; CHECK: ori $7
 define void @testlowercall16() nounwind {
 entry:
   tail call void @f16(i32 62, float 4.900000e+01, i32 64, float 3.100000e+01) nounwind
@@ -216,7 +216,7 @@ declare void @f16(i32, float, i32, float)
 
 ; $4, $5, $6, $7
 ; CHECK: addiu $4, $zero, 72
-; CHECK: ori $5, $2, 0
+; CHECK: ori $5
 ; CHECK: addiu $6, $zero, 74
 ; CHECK: addiu $7, $zero, 35
 define void @testlowercall17() nounwind {
@@ -230,7 +230,7 @@ declare void @f17(i32, float, i32, i32)
 ; $4, $5, $6, $7
 ; CHECK: addiu $4, $zero, 82
 ; CHECK: addiu $5, $zero, 93
-; CHECK: ori $6, $2, 0
+; CHECK: ori $6
 ; CHECK: addiu $7, $zero, 45
 define void @testlowercall18() nounwind {
 entry:
@@ -242,11 +242,11 @@ declare void @f18(i32, i32, float, i32)
 
 
 ; $4, ($6, $7), stack
-; CHECK: sw  $2, 16($sp)
-; CHECK: sw  $zero, 20($sp)
+; CHECK: sw
+; CHECK: sw
 ; CHECK: addiu $4, $zero, 92
 ; CHECK: addiu $6, $zero, 0
-; CHECK: ori $7, $3, 0
+; CHECK: ori $7
 define void @testlowercall20() nounwind {
 entry:
   tail call void @f20(i32 92, double 2.600000e+01, double 4.700000e+01) nounwind
@@ -270,7 +270,7 @@ declare void @f21(float, i32)
 ; CHECK: lwc1 $f12, %lo
 ; CHECK: addiu $5, $zero, 113
 ; CHECK: addiu $6, $zero, 0
-; CHECK: ori $7, $3, 32768
+; CHECK: ori $7
 define void @testlowercall22() nounwind {
 entry:
   tail call void @f22(float 6.800000e+01, i32 113, double 5.700000e+01) nounwind
@@ -291,8 +291,8 @@ entry:
 declare void @f23(double, i32)
 
 ; $f12,$6, stack
-; CHECK: sw  $2, 16($sp)
-; CHECK: sw  $zero, 20($sp)
+; CHECK: sw
+; CHECK: sw
 ; CHECK: ldc1 $f12, %lo
 ; CHECK: addiu $6, $zero, 133
 define void @testlowercall24() nounwind {
@@ -306,15 +306,15 @@ declare void @f24(double, i32, double)
 ; CHECK: lwc1 $f12, %lo
 ; lwc1 $f12, %lo
 ; CHECK: lwc1 $f14, %lo
-; CHECK: ori $6, $4, 0
-; CHECK: ori $7, $5, 0
+; CHECK: ori $6
+; CHECK: ori $7
 ; CHECK: lwc1 $f12, %lo
 ; CHECK: addiu $5, $zero, 83
-; CHECK: ori $6, $3, 0
+; CHECK: ori $6
 ; CHECK: addiu $7, $zero, 25
 ; CHECK: addiu $4, $zero, 82
 ; CHECK: addiu $5, $zero, 93
-; CHECK: ori $6, $2, 0
+; CHECK: ori $6
 ; CHECK: addiu $7, $zero, 45
 define void @testlowercall25() nounwind {
 entry:
diff --git a/test/CodeGen/Mips/o32_cc_vararg.ll b/test/CodeGen/Mips/o32_cc_vararg.ll
new file mode 100644
index 0000000..1f71ed2
--- /dev/null
+++ b/test/CodeGen/Mips/o32_cc_vararg.ll
@@ -0,0 +1,278 @@
+; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s | FileCheck %s
+; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s -regalloc=basic | FileCheck %s
+
+
+; All test functions do the same thing - they return the first variable
+; argument.
+
+; All CHECK's do the same thing - they check whether variable arguments from
+; registers are placed on correct stack locations, and whether the first
+; variable argument is returned from the correct stack location.
+
+
+declare void @llvm.va_start(i8*) nounwind
+declare void @llvm.va_end(i8*) nounwind
+
+; return int
+define i32 @va1(i32 %a, ...) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %ap = alloca i8*, align 4
+  %b = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, i32
+  store i32 %0, i32* %b, align 4
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load i32* %b, align 4
+  ret i32 %tmp
+
+; CHECK: va1:
+; CHECK: addiu   $sp, $sp, -32
+; CHECK: sw      $7, 44($sp)
+; CHECK: sw      $6, 40($sp)
+; CHECK: sw      $5, 36($sp)
+; CHECK: lw      $2, 36($sp)
+}
+
+; check whether the variable double argument will be accessed from the 8-byte
+; aligned location (i.e. whether the address is computed by adding 7 and
+; clearing lower 3 bits)
+define double @va2(i32 %a, ...) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %ap = alloca i8*, align 4
+  %b = alloca double, align 8
+  store i32 %a, i32* %a.addr, align 4
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, double
+  store double %0, double* %b, align 8
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load double* %b, align 8
+  ret double %tmp
+
+; CHECK: va2:
+; CHECK: addiu   $sp, $sp, -40
+; CHECK: sw      $7, 52($sp)
+; CHECK: sw      $6, 48($sp)
+; CHECK: sw      $5, 44($sp)
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 44
+; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
+; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
+; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
+; CHECK: ldc1    $f0, 0($[[R3]])
+}
+
+; int
+define i32 @va3(double %a, ...) nounwind {
+entry:
+  %a.addr = alloca double, align 8
+  %ap = alloca i8*, align 4
+  %b = alloca i32, align 4
+  store double %a, double* %a.addr, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, i32
+  store i32 %0, i32* %b, align 4
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load i32* %b, align 4
+  ret i32 %tmp
+
+; CHECK: va3:
+; CHECK: addiu   $sp, $sp, -40
+; CHECK: sw      $7, 52($sp)
+; CHECK: sw      $6, 48($sp)
+; CHECK: lw      $2, 48($sp)
+}
+
+; double
+define double @va4(double %a, ...) nounwind {
+entry:
+  %a.addr = alloca double, align 8
+  %ap = alloca i8*, align 4
+  %b = alloca double, align 8
+  store double %a, double* %a.addr, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, double
+  store double %0, double* %b, align 8
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load double* %b, align 8
+  ret double %tmp
+
+; CHECK: va4:
+; CHECK: addiu   $sp, $sp, -48
+; CHECK: sw      $7, 60($sp)
+; CHECK: sw      $6, 56($sp)
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 56
+; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
+; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
+; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
+; CHECK: ldc1    $f0, 0($[[R3]])
+}
+
+; int
+define i32 @va5(i32 %a, i32 %b, i32 %c, ...) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c.addr = alloca i32, align 4
+  %ap = alloca i8*, align 4
+  %d = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, i32
+  store i32 %0, i32* %d, align 4
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load i32* %d, align 4
+  ret i32 %tmp
+
+; CHECK: va5:
+; CHECK: addiu   $sp, $sp, -40
+; CHECK: sw      $7, 52($sp)
+; CHECK: lw      $2, 52($sp)
+}
+
+; double
+define double @va6(i32 %a, i32 %b, i32 %c, ...) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c.addr = alloca i32, align 4
+  %ap = alloca i8*, align 4
+  %d = alloca double, align 8
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, double
+  store double %0, double* %d, align 8
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load double* %d, align 8
+  ret double %tmp
+
+; CHECK: va6:
+; CHECK: addiu   $sp, $sp, -48
+; CHECK: sw      $7, 60($sp)
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 60
+; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
+; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
+; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
+; CHECK: ldc1    $f0, 0($[[R3]])
+}
+
+; int
+define i32 @va7(i32 %a, double %b, ...) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca double, align 8
+  %ap = alloca i8*, align 4
+  %c = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store double %b, double* %b.addr, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, i32
+  store i32 %0, i32* %c, align 4
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load i32* %c, align 4
+  ret i32 %tmp
+
+; CHECK: va7:
+; CHECK: addiu   $sp, $sp, -40
+; CHECK: lw      $2, 56($sp)
+}
+
+; double
+define double @va8(i32 %a, double %b, ...) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca double, align 8
+  %ap = alloca i8*, align 4
+  %c = alloca double, align 8
+  store i32 %a, i32* %a.addr, align 4
+  store double %b, double* %b.addr, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, double
+  store double %0, double* %c, align 8
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load double* %c, align 8
+  ret double %tmp
+
+; CHECK: va8:
+; CHECK: addiu   $sp, $sp, -48
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 64
+; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
+; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
+; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
+; CHECK: ldc1    $f0, 0($[[R3]])
+}
+
+; int
+define i32 @va9(double %a, double %b, i32 %c, ...) nounwind {
+entry:
+  %a.addr = alloca double, align 8
+  %b.addr = alloca double, align 8
+  %c.addr = alloca i32, align 4
+  %ap = alloca i8*, align 4
+  %d = alloca i32, align 4
+  store double %a, double* %a.addr, align 8
+  store double %b, double* %b.addr, align 8
+  store i32 %c, i32* %c.addr, align 4
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, i32
+  store i32 %0, i32* %d, align 4
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load i32* %d, align 4
+  ret i32 %tmp
+
+; CHECK: va9:
+; CHECK: addiu   $sp, $sp, -56
+; CHECK: lw      $2, 76($sp)
+}
+
+; double
+define double @va10(double %a, double %b, i32 %c, ...) nounwind {
+entry:
+  %a.addr = alloca double, align 8
+  %b.addr = alloca double, align 8
+  %c.addr = alloca i32, align 4
+  %ap = alloca i8*, align 4
+  %d = alloca double, align 8
+  store double %a, double* %a.addr, align 8
+  store double %b, double* %b.addr, align 8
+  store i32 %c, i32* %c.addr, align 4
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, double
+  store double %0, double* %d, align 8
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %ap2)
+  %tmp = load double* %d, align 8
+  ret double %tmp
+
+; CHECK: va10:
+; CHECK: addiu   $sp, $sp, -56
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 76
+; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
+; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
+; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
+; CHECK: ldc1    $f0, 0($[[R3]])
+}
diff --git a/test/CodeGen/Mips/select.ll b/test/CodeGen/Mips/select.ll
new file mode 100644
index 0000000..c83fa3e
--- /dev/null
+++ b/test/CodeGen/Mips/select.ll
@@ -0,0 +1,196 @@
+; RUN: llc  < %s -march=mipsel -mcpu=4ke | FileCheck %s -check-prefix=CHECK-MIPS32R2
+; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-MIPS1
+
+@d2 = external global double
+@d3 = external global double
+
+define i32 @sel1(i32 %s, i32 %f0, i32 %f1) nounwind readnone {
+entry:
+; CHECK-MIPS32R2: movn
+; CHECK-MIPS1: beq
+  %tobool = icmp ne i32 %s, 0
+  %cond = select i1 %tobool, i32 %f1, i32 %f0
+  ret i32 %cond
+}
+
+define float @sel2(i32 %s, float %f0, float %f1) nounwind readnone {
+entry:
+; CHECK-MIPS32R2: movn.s
+; CHECK-MIPS1: beq
+  %tobool = icmp ne i32 %s, 0
+  %cond = select i1 %tobool, float %f0, float %f1
+  ret float %cond
+}
+
+define double @sel2_1(i32 %s, double %f0, double %f1) nounwind readnone {
+entry:
+; CHECK-MIPS32R2: movn.d
+; CHECK-MIPS1: beq
+  %tobool = icmp ne i32 %s, 0
+  %cond = select i1 %tobool, double %f0, double %f1
+  ret double %cond
+}
+
+define float @sel3(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
+entry:
+; CHECK-MIPS32R2: c.eq.s
+; CHECK-MIPS32R2: movt.s
+; CHECK-MIPS1: c.eq.s
+; CHECK-MIPS1: bc1f
+  %cmp = fcmp oeq float %f2, %f3
+  %cond = select i1 %cmp, float %f0, float %f1
+  ret float %cond
+}
+
+define float @sel4(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
+entry:
+; CHECK-MIPS32R2: c.olt.s
+; CHECK-MIPS32R2: movt.s
+; CHECK-MIPS1: c.olt.s
+; CHECK-MIPS1: bc1f
+  %cmp = fcmp olt float %f2, %f3
+  %cond = select i1 %cmp, float %f0, float %f1
+  ret float %cond
+}
+
+define float @sel5(float %f0, float %f1, float %f2, float %f3) nounwind readnone {
+entry:
+; CHECK-MIPS32R2: c.ule.s
+; CHECK-MIPS32R2: movf.s
+; CHECK-MIPS1: c.ule.s
+; CHECK-MIPS1: bc1t
+  %cmp = fcmp ogt float %f2, %f3
+  %cond = select i1 %cmp, float %f0, float %f1
+  ret float %cond
+}
+
+define double @sel5_1(double %f0, double %f1, float %f2, float %f3) nounwind readnone {
+entry:
+; CHECK-MIPS32R2: c.ule.s
+; CHECK-MIPS32R2: movf.d
+; CHECK-MIPS1: c.ule.s
+; CHECK-MIPS1: bc1t
+  %cmp = fcmp ogt float %f2, %f3
+  %cond = select i1 %cmp, double %f0, double %f1
+  ret double %cond
+}
+
+define double @sel6(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
+entry:
+; CHECK-MIPS32R2: c.eq.d
+; CHECK-MIPS32R2: movt.d
+; CHECK-MIPS1: c.eq.d
+; CHECK-MIPS1: bc1f
+  %cmp = fcmp oeq double %f2, %f3
+  %cond = select i1 %cmp, double %f0, double %f1
+  ret double %cond
+}
+
+define double @sel7(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
+entry:
+; CHECK-MIPS32R2: c.olt.d
+; CHECK-MIPS32R2: movt.d
+; CHECK-MIPS1: c.olt.d
+; CHECK-MIPS1: bc1f
+  %cmp = fcmp olt double %f2, %f3
+  %cond = select i1 %cmp, double %f0, double %f1
+  ret double %cond
+}
+
+define double @sel8(double %f0, double %f1, double %f2, double %f3) nounwind readnone {
+entry:
+; CHECK-MIPS32R2: c.ule.d
+; CHECK-MIPS32R2: movf.d
+; CHECK-MIPS1: c.ule.d
+; CHECK-MIPS1: bc1t
+  %cmp = fcmp ogt double %f2, %f3
+  %cond = select i1 %cmp, double %f0, double %f1
+  ret double %cond
+}
+
+define float @sel8_1(float %f0, float %f1, double %f2, double %f3) nounwind readnone {
+entry:
+; CHECK-MIPS32R2: c.ule.d
+; CHECK-MIPS32R2: movf.s
+; CHECK-MIPS1: c.ule.d
+; CHECK-MIPS1: bc1t
+  %cmp = fcmp ogt double %f2, %f3
+  %cond = select i1 %cmp, float %f0, float %f1
+  ret float %cond
+}
+
+define i32 @sel9(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+entry:
+; CHECK-MIPS32R2: c.eq.s
+; CHECK-MIPS32R2: movt
+; CHECK-MIPS1: c.eq.s
+; CHECK-MIPS1: bc1f
+  %cmp = fcmp oeq float %f2, %f3
+  %cond = select i1 %cmp, i32 %f0, i32 %f1
+  ret i32 %cond
+}
+
+define i32 @sel10(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+entry:
+; CHECK-MIPS32R2: c.olt.s
+; CHECK-MIPS32R2: movt
+; CHECK-MIPS1: c.olt.s
+; CHECK-MIPS1: bc1f
+  %cmp = fcmp olt float %f2, %f3
+  %cond = select i1 %cmp, i32 %f0, i32 %f1
+  ret i32 %cond
+}
+
+define i32 @sel11(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+entry:
+; CHECK-MIPS32R2: c.ule.s
+; CHECK-MIPS32R2: movf
+; CHECK-MIPS1: c.ule.s
+; CHECK-MIPS1: bc1t
+  %cmp = fcmp ogt float %f2, %f3
+  %cond = select i1 %cmp, i32 %f0, i32 %f1
+  ret i32 %cond
+}
+
+define i32 @sel12(i32 %f0, i32 %f1) nounwind readonly {
+entry:
+; CHECK-MIPS32R2: c.eq.d
+; CHECK-MIPS32R2: movt
+; CHECK-MIPS1: c.eq.d
+; CHECK-MIPS1: bc1f
+  %tmp = load double* @d2, align 8, !tbaa !0
+  %tmp1 = load double* @d3, align 8, !tbaa !0
+  %cmp = fcmp oeq double %tmp, %tmp1
+  %cond = select i1 %cmp, i32 %f0, i32 %f1
+  ret i32 %cond
+}
+
+define i32 @sel13(i32 %f0, i32 %f1) nounwind readonly {
+entry:
+; CHECK-MIPS32R2: c.olt.d
+; CHECK-MIPS32R2: movt
+; CHECK-MIPS1: c.olt.d
+; CHECK-MIPS1: bc1f
+  %tmp = load double* @d2, align 8, !tbaa !0
+  %tmp1 = load double* @d3, align 8, !tbaa !0
+  %cmp = fcmp olt double %tmp, %tmp1
+  %cond = select i1 %cmp, i32 %f0, i32 %f1
+  ret i32 %cond
+}
+
+define i32 @sel14(i32 %f0, i32 %f1) nounwind readonly {
+entry:
+; CHECK-MIPS32R2: c.ule.d
+; CHECK-MIPS32R2: movf
+; CHECK-MIPS1: c.ule.d
+; CHECK-MIPS1: bc1t
+  %tmp = load double* @d2, align 8, !tbaa !0
+  %tmp1 = load double* @d3, align 8, !tbaa !0
+  %cmp = fcmp ogt double %tmp, %tmp1
+  %cond = select i1 %cmp, i32 %f0, i32 %f1
+  ret i32 %cond
+}
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/PTX/add.ll b/test/CodeGen/PTX/add.ll
index 1259d03..235b00e 100644
--- a/test/CodeGen/PTX/add.ll
+++ b/test/CodeGen/PTX/add.ll
@@ -1,15 +1,71 @@
-; RUN: llc < %s -march=ptx | FileCheck %s
+; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-define ptx_device i32 @t1(i32 %x, i32 %y) {
-; CHECK: add.s32 r0, r1, r2;
+define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
+; CHECK: add.u16 rh0, rh1, rh2;
+; CHECK-NEXT: ret;
+	%z = add i16 %x, %y
+	ret i16 %z
+}
+
+define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
+; CHECK: add.u32 r0, r1, r2;
+; CHECK-NEXT: ret;
 	%z = add i32 %x, %y
-; CHECK: ret;
 	ret i32 %z
 }
 
-define ptx_device i32 @t2(i32 %x) {
-; CHECK: add.s32 r0, r1, 1;
+define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
+; CHECK: add.u64 rd0, rd1, rd2;
+; CHECK-NEXT: ret;
+	%z = add i64 %x, %y
+	ret i64 %z
+}
+
+define ptx_device float @t1_f32(float %x, float %y) {
+; CHECK: add.f32 f0, f1, f2
+; CHECK-NEXT: ret;
+  %z = fadd float %x, %y
+  ret float %z
+}
+
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: add.f64 fd0, fd1, fd2
+; CHECK-NEXT: ret;
+  %z = fadd double %x, %y
+  ret double %z
+}
+
+define ptx_device i16 @t2_u16(i16 %x) {
+; CHECK: add.u16 rh0, rh1, 1;
+; CHECK-NEXT: ret;
+	%z = add i16 %x, 1
+	ret i16 %z
+}
+
+define ptx_device i32 @t2_u32(i32 %x) {
+; CHECK: add.u32 r0, r1, 1;
+; CHECK-NEXT: ret;
 	%z = add i32 %x, 1
-; CHECK: ret;
 	ret i32 %z
 }
+
+define ptx_device i64 @t2_u64(i64 %x) {
+; CHECK: add.u64 rd0, rd1, 1;
+; CHECK-NEXT: ret;
+	%z = add i64 %x, 1
+	ret i64 %z
+}
+
+define ptx_device float @t2_f32(float %x) {
+; CHECK: add.f32 f0, f1, 0F3F800000;
+; CHECK-NEXT: ret;
+  %z = fadd float %x, 1.0
+  ret float %z
+}
+
+define ptx_device double @t2_f64(double %x) {
+; CHECK: add.f64 fd0, fd1, 0D3FF0000000000000;
+; CHECK-NEXT: ret;
+  %z = fadd double %x, 1.0
+  ret double %z
+}
diff --git a/test/CodeGen/PTX/bitwise.ll b/test/CodeGen/PTX/bitwise.ll
new file mode 100644
index 0000000..dbc77e5
--- /dev/null
+++ b/test/CodeGen/PTX/bitwise.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+; preds
+
+define ptx_device i32 @t1_and_preds(i1 %x, i1 %y) {
+; CHECK: and.pred p0, p1, p2
+  %c = and i1 %x, %y
+  %d = zext i1 %c to i32 
+  ret i32 %d
+}
+
+define ptx_device i32 @t1_or_preds(i1 %x, i1 %y) {
+; CHECK: or.pred p0, p1, p2
+  %a = or i1 %x, %y
+  %b = zext i1 %a to i32 
+  ret i32 %b
+}
+
+define ptx_device i32 @t1_xor_preds(i1 %x, i1 %y) {
+; CHECK: xor.pred p0, p1, p2
+  %a = xor i1 %x, %y
+  %b = zext i1 %a to i32 
+  ret i32 %b
+}
diff --git a/test/CodeGen/PTX/bra.ll b/test/CodeGen/PTX/bra.ll
new file mode 100644
index 0000000..49383eb
--- /dev/null
+++ b/test/CodeGen/PTX/bra.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device void @test_bra_direct() {
+; CHECK: bra $L__BB0_1;
+entry:
+	br label %loop
+loop:
+	br label %loop
+}
+
+define ptx_device i32 @test_bra_cond_direct(i32 %x, i32 %y) {
+entry:
+; CHECK: setp.le.u32 p0, r1, r2
+	%p = icmp ugt i32 %x, %y
+; CHECK-NEXT: @p0 bra
+; CHECK-NOT: bra
+	br i1 %p, label %clause.if, label %clause.else
+clause.if:
+; CHECK: mov.u32 r0, r1
+	ret i32 %x
+clause.else:
+; CHECK: mov.u32 r0, r2
+	ret i32 %y
+}
diff --git a/test/CodeGen/PTX/exit.ll b/test/CodeGen/PTX/exit.ll
index 4071bab..7816c80 100644
--- a/test/CodeGen/PTX/exit.ll
+++ b/test/CodeGen/PTX/exit.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ptx | FileCheck %s
+; RUN: llc < %s -march=ptx32 | FileCheck %s
 
 define ptx_kernel void @t1() {
 ; CHECK: exit;
diff --git a/test/CodeGen/PTX/fdiv-sm10.ll b/test/CodeGen/PTX/fdiv-sm10.ll
new file mode 100644
index 0000000..121360c
--- /dev/null
+++ b/test/CodeGen/PTX/fdiv-sm10.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s
+
+define ptx_device float @t1_f32(float %x, float %y) {
+; CHECK: div.approx.f32 f0, f1, f2;
+; CHECK-NEXT: ret;
+	%a = fdiv float %x, %y
+	ret float %a
+}
+
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: div.f64 fd0, fd1, fd2;
+; CHECK-NEXT: ret;
+	%a = fdiv double %x, %y
+	ret double %a
+}
diff --git a/test/CodeGen/PTX/fdiv-sm13.ll b/test/CodeGen/PTX/fdiv-sm13.ll
new file mode 100644
index 0000000..0ec7bae
--- /dev/null
+++ b/test/CodeGen/PTX/fdiv-sm13.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
+
+define ptx_device float @t1_f32(float %x, float %y) {
+; CHECK: div.approx.f32 f0, f1, f2;
+; CHECK-NEXT: ret;
+	%a = fdiv float %x, %y
+	ret float %a
+}
+
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: div.rn.f64 fd0, fd1, fd2;
+; CHECK-NEXT: ret;
+	%a = fdiv double %x, %y
+	ret double %a
+}
diff --git a/test/CodeGen/PTX/intrinsic.ll b/test/CodeGen/PTX/intrinsic.ll
new file mode 100644
index 0000000..cea4182
--- /dev/null
+++ b/test/CodeGen/PTX/intrinsic.ll
@@ -0,0 +1,281 @@
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s
+
+define ptx_device i32 @test_tid_x() {
+; CHECK: mov.u32 r0, %tid.x;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.tid.x()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_tid_y() {
+; CHECK: mov.u32 r0, %tid.y;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.tid.y()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_tid_z() {
+; CHECK: mov.u32 r0, %tid.z;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.tid.z()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_tid_w() {
+; CHECK: mov.u32 r0, %tid.w;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.tid.w()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ntid_x() {
+; CHECK: mov.u32 r0, %ntid.x;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.ntid.x()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ntid_y() {
+; CHECK: mov.u32 r0, %ntid.y;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.ntid.y()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ntid_z() {
+; CHECK: mov.u32 r0, %ntid.z;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.ntid.z()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ntid_w() {
+; CHECK: mov.u32 r0, %ntid.w;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.ntid.w()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_laneid() {
+; CHECK: mov.u32 r0, %laneid;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.laneid()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_warpid() {
+; CHECK: mov.u32 r0, %warpid;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.warpid()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_nwarpid() {
+; CHECK: mov.u32 r0, %nwarpid;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.nwarpid()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ctaid_x() {
+; CHECK: mov.u32 r0, %ctaid.x;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.ctaid.x()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ctaid_y() {
+; CHECK: mov.u32 r0, %ctaid.y;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.ctaid.y()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ctaid_z() {
+; CHECK: mov.u32 r0, %ctaid.z;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.ctaid.z()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_ctaid_w() {
+; CHECK: mov.u32 r0, %ctaid.w;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.ctaid.w()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_nctaid_x() {
+; CHECK: mov.u32 r0, %nctaid.x;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.nctaid.x()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_nctaid_y() {
+; CHECK: mov.u32 r0, %nctaid.y;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.nctaid.y()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_nctaid_z() {
+; CHECK: mov.u32 r0, %nctaid.z;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.nctaid.z()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_nctaid_w() {
+; CHECK: mov.u32 r0, %nctaid.w;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.nctaid.w()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_smid() {
+; CHECK: mov.u32 r0, %smid;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.smid()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_nsmid() {
+; CHECK: mov.u32 r0, %nsmid;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.nsmid()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_gridid() {
+; CHECK: mov.u32 r0, %gridid;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.gridid()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_lanemask_eq() {
+; CHECK: mov.u32 r0, %lanemask_eq;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.lanemask.eq()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_lanemask_le() {
+; CHECK: mov.u32 r0, %lanemask_le;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.lanemask.le()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_lanemask_lt() {
+; CHECK: mov.u32 r0, %lanemask_lt;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.lanemask.lt()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_lanemask_ge() {
+; CHECK: mov.u32 r0, %lanemask_ge;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.lanemask.ge()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_lanemask_gt() {
+; CHECK: mov.u32 r0, %lanemask_gt;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.lanemask.gt()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_clock() {
+; CHECK: mov.u32 r0, %clock;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.clock()
+	ret i32 %x
+}
+
+define ptx_device i64 @test_clock64() {
+; CHECK: mov.u64 rd0, %clock64;
+; CHECK-NEXT: ret;
+	%x = call i64 @llvm.ptx.read.clock64()
+	ret i64 %x
+}
+
+define ptx_device i32 @test_pm0() {
+; CHECK: mov.u32 r0, %pm0;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.pm0()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_pm1() {
+; CHECK: mov.u32 r0, %pm1;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.pm1()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_pm2() {
+; CHECK: mov.u32 r0, %pm2;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.pm2()
+	ret i32 %x
+}
+
+define ptx_device i32 @test_pm3() {
+; CHECK: mov.u32 r0, %pm3;
+; CHECK-NEXT: ret;
+	%x = call i32 @llvm.ptx.read.pm3()
+	ret i32 %x
+}
+
+define ptx_device void @test_bar_sync() {
+; CHECK: bar.sync 0
+; CHECK-NEXT: ret;
+	call void @llvm.ptx.bar.sync(i32 0)
+	ret void
+}
+
+declare i32 @llvm.ptx.read.tid.x()
+declare i32 @llvm.ptx.read.tid.y()
+declare i32 @llvm.ptx.read.tid.z()
+declare i32 @llvm.ptx.read.tid.w()
+declare i32 @llvm.ptx.read.ntid.x()
+declare i32 @llvm.ptx.read.ntid.y()
+declare i32 @llvm.ptx.read.ntid.z()
+declare i32 @llvm.ptx.read.ntid.w()
+
+declare i32 @llvm.ptx.read.laneid()
+declare i32 @llvm.ptx.read.warpid()
+declare i32 @llvm.ptx.read.nwarpid()
+
+declare i32 @llvm.ptx.read.ctaid.x()
+declare i32 @llvm.ptx.read.ctaid.y()
+declare i32 @llvm.ptx.read.ctaid.z()
+declare i32 @llvm.ptx.read.ctaid.w()
+declare i32 @llvm.ptx.read.nctaid.x()
+declare i32 @llvm.ptx.read.nctaid.y()
+declare i32 @llvm.ptx.read.nctaid.z()
+declare i32 @llvm.ptx.read.nctaid.w()
+
+declare i32 @llvm.ptx.read.smid()
+declare i32 @llvm.ptx.read.nsmid()
+declare i32 @llvm.ptx.read.gridid()
+
+declare i32 @llvm.ptx.read.lanemask.eq()
+declare i32 @llvm.ptx.read.lanemask.le()
+declare i32 @llvm.ptx.read.lanemask.lt()
+declare i32 @llvm.ptx.read.lanemask.ge()
+declare i32 @llvm.ptx.read.lanemask.gt()
+
+declare i32 @llvm.ptx.read.clock()
+declare i64 @llvm.ptx.read.clock64()
+
+declare i32 @llvm.ptx.read.pm0()
+declare i32 @llvm.ptx.read.pm1()
+declare i32 @llvm.ptx.read.pm2()
+declare i32 @llvm.ptx.read.pm3()
+
+declare void @llvm.ptx.bar.sync(i32 %i)
diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll
index 836c4d4..377a95a 100644
--- a/test/CodeGen/PTX/ld.ll
+++ b/test/CodeGen/PTX/ld.ll
@@ -1,78 +1,447 @@
-; RUN: llc < %s -march=ptx | FileCheck %s
+; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-;CHECK: .extern .global .s32 array[];
-@array = external global [10 x i32]
+;CHECK: .extern .global .b8 array_i16[20];
+@array_i16 = external global [10 x i16]
 
-;CHECK: .extern .const .s32 array_constant[];
-@array_constant = external addrspace(1) constant [10 x i32]
+;CHECK: .extern .const .b8 array_constant_i16[20];
+@array_constant_i16 = external addrspace(1) constant [10 x i16]
 
-;CHECK: .extern .local .s32 array_local[];
-@array_local = external addrspace(2) global [10 x i32]
+;CHECK: .extern .local .b8 array_local_i16[20];
+@array_local_i16 = external addrspace(2) global [10 x i16]
 
-;CHECK: .extern .shared .s32 array_shared[];
-@array_shared = external addrspace(4) global [10 x i32]
+;CHECK: .extern .shared .b8 array_shared_i16[20];
+@array_shared_i16 = external addrspace(4) global [10 x i16]
 
-define ptx_device i32 @t1(i32* %p) {
+;CHECK: .extern .global .b8 array_i32[40];
+@array_i32 = external global [10 x i32]
+
+;CHECK: .extern .const .b8 array_constant_i32[40];
+@array_constant_i32 = external addrspace(1) constant [10 x i32]
+
+;CHECK: .extern .local .b8 array_local_i32[40];
+@array_local_i32 = external addrspace(2) global [10 x i32]
+
+;CHECK: .extern .shared .b8 array_shared_i32[40];
+@array_shared_i32 = external addrspace(4) global [10 x i32]
+
+;CHECK: .extern .global .b8 array_i64[80];
+@array_i64 = external global [10 x i64]
+
+;CHECK: .extern .const .b8 array_constant_i64[80];
+@array_constant_i64 = external addrspace(1) constant [10 x i64]
+
+;CHECK: .extern .local .b8 array_local_i64[80];
+@array_local_i64 = external addrspace(2) global [10 x i64]
+
+;CHECK: .extern .shared .b8 array_shared_i64[80];
+@array_shared_i64 = external addrspace(4) global [10 x i64]
+
+;CHECK: .extern .global .b8 array_float[40];
+@array_float = external global [10 x float]
+
+;CHECK: .extern .const .b8 array_constant_float[40];
+@array_constant_float = external addrspace(1) constant [10 x float]
+
+;CHECK: .extern .local .b8 array_local_float[40];
+@array_local_float = external addrspace(2) global [10 x float]
+
+;CHECK: .extern .shared .b8 array_shared_float[40];
+@array_shared_float = external addrspace(4) global [10 x float]
+
+;CHECK: .extern .global .b8 array_double[80];
+@array_double = external global [10 x double]
+
+;CHECK: .extern .const .b8 array_constant_double[80];
+@array_constant_double = external addrspace(1) constant [10 x double]
+
+;CHECK: .extern .local .b8 array_local_double[80];
+@array_local_double = external addrspace(2) global [10 x double]
+
+;CHECK: .extern .shared .b8 array_shared_double[80];
+@array_shared_double = external addrspace(4) global [10 x double]
+
+
+define ptx_device i16 @t1_u16(i16* %p) {
 entry:
-;CHECK: ld.global.s32 r0, [r1];
+;CHECK: ld.global.u16 rh0, [r1];
+;CHECK-NEXT: ret;
+  %x = load i16* %p
+  ret i16 %x
+}
+
+define ptx_device i32 @t1_u32(i32* %p) {
+entry:
+;CHECK: ld.global.u32 r0, [r1];
+;CHECK-NEXT: ret;
   %x = load i32* %p
   ret i32 %x
 }
 
-define ptx_device i32 @t2(i32* %p) {
+define ptx_device i64 @t1_u64(i64* %p) {
+entry:
+;CHECK: ld.global.u64 rd0, [r1];
+;CHECK-NEXT: ret;
+  %x = load i64* %p
+  ret i64 %x
+}
+
+define ptx_device float @t1_f32(float* %p) {
+entry:
+;CHECK: ld.global.f32 f0, [r1];
+;CHECK-NEXT: ret;
+  %x = load float* %p
+  ret float %x
+}
+
+define ptx_device double @t1_f64(double* %p) {
+entry:
+;CHECK: ld.global.f64 fd0, [r1];
+;CHECK-NEXT: ret;
+  %x = load double* %p
+  ret double %x
+}
+
+define ptx_device i16 @t2_u16(i16* %p) {
 entry:
-;CHECK: ld.global.s32 r0, [r1+4];
+;CHECK: ld.global.u16 rh0, [r1+2];
+;CHECK-NEXT: ret;
+  %i = getelementptr i16* %p, i32 1
+  %x = load i16* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t2_u32(i32* %p) {
+entry:
+;CHECK: ld.global.u32 r0, [r1+4];
+;CHECK-NEXT: ret;
   %i = getelementptr i32* %p, i32 1
   %x = load i32* %i
   ret i32 %x
 }
 
-define ptx_device i32 @t3(i32* %p, i32 %q) {
+define ptx_device i64 @t2_u64(i64* %p) {
+entry:
+;CHECK: ld.global.u64 rd0, [r1+8];
+;CHECK-NEXT: ret;
+  %i = getelementptr i64* %p, i32 1
+  %x = load i64* %i
+  ret i64 %x
+}
+
+define ptx_device float @t2_f32(float* %p) {
+entry:
+;CHECK: ld.global.f32 f0, [r1+4];
+;CHECK-NEXT: ret;
+  %i = getelementptr float* %p, i32 1
+  %x = load float* %i
+  ret float %x
+}
+
+define ptx_device double @t2_f64(double* %p) {
+entry:
+;CHECK: ld.global.f64 fd0, [r1+8];
+;CHECK-NEXT: ret;
+  %i = getelementptr double* %p, i32 1
+  %x = load double* %i
+  ret double %x
+}
+
+define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 1;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: ld.global.u16 rh0, [r0];
+  %i = getelementptr i16* %p, i32 %q
+  %x = load i16* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t3_u32(i32* %p, i32 %q) {
 entry:
 ;CHECK: shl.b32 r0, r2, 2;
-;CHECK: add.s32 r0, r1, r0;
-;CHECK: ld.global.s32 r0, [r0];
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: ld.global.u32 r0, [r0];
   %i = getelementptr i32* %p, i32 %q
   %x = load i32* %i
   ret i32 %x
 }
 
-define ptx_device i32 @t4_global() {
+define ptx_device i64 @t3_u64(i64* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 3;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: ld.global.u64 rd0, [r0];
+  %i = getelementptr i64* %p, i32 %q
+  %x = load i64* %i
+  ret i64 %x
+}
+
+define ptx_device float @t3_f32(float* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 2;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: ld.global.f32 f0, [r0];
+  %i = getelementptr float* %p, i32 %q
+  %x = load float* %i
+  ret float %x
+}
+
+define ptx_device double @t3_f64(double* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 3;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: ld.global.f64 fd0, [r0];
+  %i = getelementptr double* %p, i32 %q
+  %x = load double* %i
+  ret double %x
+}
+
+define ptx_device i16 @t4_global_u16() {
+entry:
+;CHECK: mov.u32 r0, array_i16;
+;CHECK-NEXT: ld.global.u16 rh0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0
+  %x = load i16* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t4_global_u32() {
 entry:
-;CHECK: ld.global.s32 r0, [array];
-  %i = getelementptr [10 x i32]* @array, i32 0, i32 0
+;CHECK: mov.u32 r0, array_i32;
+;CHECK-NEXT: ld.global.u32 r0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
   %x = load i32* %i
   ret i32 %x
 }
 
-define ptx_device i32 @t4_const() {
+define ptx_device i64 @t4_global_u64() {
 entry:
-;CHECK: ld.const.s32 r0, [array_constant];
-  %i = getelementptr [10 x i32] addrspace(1)* @array_constant, i32 0, i32 0
+;CHECK: mov.u32 r0, array_i64;
+;CHECK-NEXT: ld.global.u64 rd0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
+  %x = load i64* %i
+  ret i64 %x
+}
+
+define ptx_device float @t4_global_f32() {
+entry:
+;CHECK: mov.u32 r0, array_float;
+;CHECK-NEXT: ld.global.f32 f0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
+  %x = load float* %i
+  ret float %x
+}
+
+define ptx_device double @t4_global_f64() {
+entry:
+;CHECK: mov.u32 r0, array_double;
+;CHECK-NEXT: ld.global.f64 fd0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
+  %x = load double* %i
+  ret double %x
+}
+
+define ptx_device i16 @t4_const_u16() {
+entry:
+;CHECK: mov.u32 r0, array_constant_i16;
+;CHECK-NEXT: ld.const.u16 rh0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0
+  %x = load i16 addrspace(1)* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t4_const_u32() {
+entry:
+;CHECK: mov.u32 r0, array_constant_i32;
+;CHECK-NEXT: ld.const.u32 r0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0
   %x = load i32 addrspace(1)* %i
   ret i32 %x
 }
 
-define ptx_device i32 @t4_local() {
+define ptx_device i64 @t4_const_u64() {
+entry:
+;CHECK: mov.u32 r0, array_constant_i64;
+;CHECK-NEXT: ld.const.u64 rd0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0
+  %x = load i64 addrspace(1)* %i
+  ret i64 %x
+}
+
+define ptx_device float @t4_const_f32() {
 entry:
-;CHECK: ld.local.s32 r0, [array_local];
-  %i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0
+;CHECK: mov.u32 r0, array_constant_float;
+;CHECK-NEXT: ld.const.f32 f0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0
+  %x = load float addrspace(1)* %i
+  ret float %x
+}
+
+define ptx_device double @t4_const_f64() {
+entry:
+;CHECK: mov.u32 r0, array_constant_double;
+;CHECK-NEXT: ld.const.f64 fd0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0
+  %x = load double addrspace(1)* %i
+  ret double %x
+}
+
+define ptx_device i16 @t4_local_u16() {
+entry:
+;CHECK: mov.u32 r0, array_local_i16;
+;CHECK-NEXT: ld.local.u16 rh0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
+  %x = load i16 addrspace(2)* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t4_local_u32() {
+entry:
+;CHECK: mov.u32 r0, array_local_i32;
+;CHECK-NEXT: ld.local.u32 r0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
   %x = load i32 addrspace(2)* %i
   ret i32 %x
 }
 
-define ptx_device i32 @t4_shared() {
+define ptx_device i64 @t4_local_u64() {
 entry:
-;CHECK: ld.shared.s32 r0, [array_shared];
-  %i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0
+;CHECK: mov.u32 r0, array_local_i64;
+;CHECK-NEXT: ld.local.u64 rd0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
+  %x = load i64 addrspace(2)* %i
+  ret i64 %x
+}
+
+define ptx_device float @t4_local_f32() {
+entry:
+;CHECK: mov.u32 r0, array_local_float;
+;CHECK-NEXT: ld.local.f32 f0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
+  %x = load float addrspace(2)* %i
+  ret float %x
+}
+
+define ptx_device double @t4_local_f64() {
+entry:
+;CHECK: mov.u32 r0, array_local_double;
+;CHECK-NEXT: ld.local.f64 fd0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
+  %x = load double addrspace(2)* %i
+  ret double %x
+}
+
+define ptx_device i16 @t4_shared_u16() {
+entry:
+;CHECK: mov.u32 r0, array_shared_i16;
+;CHECK-NEXT: ld.shared.u16 rh0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
+  %x = load i16 addrspace(4)* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t4_shared_u32() {
+entry:
+;CHECK: mov.u32 r0, array_shared_i32;
+;CHECK-NEXT: ld.shared.u32 r0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
   %x = load i32 addrspace(4)* %i
   ret i32 %x
 }
 
-define ptx_device i32 @t5() {
+define ptx_device i64 @t4_shared_u64() {
+entry:
+;CHECK: mov.u32 r0, array_shared_i64;
+;CHECK-NEXT: ld.shared.u64 rd0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
+  %x = load i64 addrspace(4)* %i
+  ret i64 %x
+}
+
+define ptx_device float @t4_shared_f32() {
+entry:
+;CHECK: mov.u32 r0, array_shared_float;
+;CHECK-NEXT: ld.shared.f32 f0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
+  %x = load float addrspace(4)* %i
+  ret float %x
+}
+
+define ptx_device double @t4_shared_f64() {
+entry:
+;CHECK: mov.u32 r0, array_shared_double;
+;CHECK-NEXT: ld.shared.f64 fd0, [r0];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
+  %x = load double addrspace(4)* %i
+  ret double %x
+}
+
+define ptx_device i16 @t5_u16() {
 entry:
-;CHECK: ld.global.s32 r0, [array+4];
-  %i = getelementptr [10 x i32]* @array, i32 0, i32 1
+;CHECK: mov.u32 r0, array_i16;
+;CHECK-NEXT: ld.global.u16 rh0, [r0+2];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
+  %x = load i16* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t5_u32() {
+entry:
+;CHECK: mov.u32 r0, array_i32;
+;CHECK-NEXT: ld.global.u32 r0, [r0+4];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
   %x = load i32* %i
   ret i32 %x
 }
+
+define ptx_device i64 @t5_u64() {
+entry:
+;CHECK: mov.u32 r0, array_i64;
+;CHECK-NEXT: ld.global.u64 rd0, [r0+8];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
+  %x = load i64* %i
+  ret i64 %x
+}
+
+define ptx_device float @t5_f32() {
+entry:
+;CHECK: mov.u32 r0, array_float;
+;CHECK-NEXT: ld.global.f32 f0, [r0+4];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
+  %x = load float* %i
+  ret float %x
+}
+
+define ptx_device double @t5_f64() {
+entry:
+;CHECK: mov.u32 r0, array_double;
+;CHECK-NEXT: ld.global.f64 fd0, [r0+8];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
+  %x = load double* %i
+  ret double %x
+}
diff --git a/test/CodeGen/PTX/llvm-intrinsic.ll b/test/CodeGen/PTX/llvm-intrinsic.ll
new file mode 100644
index 0000000..1e265f5
--- /dev/null
+++ b/test/CodeGen/PTX/llvm-intrinsic.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s
+
+define ptx_device float @test_sqrt_f32(float %x) {
+entry:
+; CHECK: sqrt.rn.f32 f0, f1;
+; CHECK-NEXT: ret;
+  %y = call float @llvm.sqrt.f32(float %x)
+  ret float %y
+}
+
+define ptx_device double @test_sqrt_f64(double %x) {
+entry:
+; CHECK: sqrt.rn.f64 fd0, fd1;
+; CHECK-NEXT: ret;
+  %y = call double @llvm.sqrt.f64(double %x)
+  ret double %y
+}
+
+define ptx_device float @test_sin_f32(float %x) {
+entry:
+; CHECK: sin.approx.f32 f0, f1;
+; CHECK-NEXT: ret;
+  %y = call float @llvm.sin.f32(float %x)
+  ret float %y
+}
+
+define ptx_device double @test_sin_f64(double %x) {
+entry:
+; CHECK: sin.approx.f64 fd0, fd1;
+; CHECK-NEXT: ret;
+  %y = call double @llvm.sin.f64(double %x)
+  ret double %y
+}
+
+define ptx_device float @test_cos_f32(float %x) {
+entry:
+; CHECK: cos.approx.f32 f0, f1;
+; CHECK-NEXT: ret;
+  %y = call float @llvm.cos.f32(float %x)
+  ret float %y
+}
+
+define ptx_device double @test_cos_f64(double %x) {
+entry:
+; CHECK: cos.approx.f64 fd0, fd1;
+; CHECK-NEXT: ret;
+  %y = call double @llvm.cos.f64(double %x)
+  ret double %y
+}
+
+declare float  @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
+declare float  @llvm.sin.f32(float)
+declare double @llvm.sin.f64(double)
+declare float  @llvm.cos.f32(float)
+declare double @llvm.cos.f64(double)
diff --git a/test/CodeGen/PTX/mad.ll b/test/CodeGen/PTX/mad.ll
new file mode 100644
index 0000000..0c25f2c
--- /dev/null
+++ b/test/CodeGen/PTX/mad.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
+
+define ptx_device float @t1_f32(float %x, float %y, float %z) {
+; CHECK: mad.rn.f32 f0, f1, f2, f3;
+; CHECK-NEXT: ret;
+	%a = fmul float %x, %y
+  %b = fadd float %a, %z
+	ret float %b
+}
+
+define ptx_device double @t1_f64(double %x, double %y, double %z) {
+; CHECK: mad.rn.f64 fd0, fd1, fd2, fd3;
+; CHECK-NEXT: ret;
+	%a = fmul double %x, %y
+  %b = fadd double %a, %z
+	ret double %b
+}
diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll
index c365e9b..120572a 100644
--- a/test/CodeGen/PTX/mov.ll
+++ b/test/CodeGen/PTX/mov.ll
@@ -1,13 +1,62 @@
-; RUN: llc < %s -march=ptx | FileCheck %s
+; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-define ptx_device i32 @t1() {
-; CHECK: mov.s32 r0, 0;
+define ptx_device i16 @t1_u16() {
+; CHECK: mov.u16 rh0, 0;
+; CHECK: ret;
+	ret i16 0
+}
+
+define ptx_device i32 @t1_u32() {
+; CHECK: mov.u32 r0, 0;
 ; CHECK: ret;
 	ret i32 0
 }
 
-define ptx_device i32 @t2(i32 %x) {
-; CHECK: mov.s32 r0, r1;
+define ptx_device i64 @t1_u64() {
+; CHECK: mov.u64 rd0, 0;
+; CHECK: ret;
+	ret i64 0
+}
+
+define ptx_device float @t1_f32() {
+; CHECK: mov.f32 f0, 0F00000000;
+; CHECK: ret;
+	ret float 0.0
+}
+
+define ptx_device double @t1_f64() {
+; CHECK: mov.f64 fd0, 0D0000000000000000;
+; CHECK: ret;
+	ret double 0.0
+}
+
+define ptx_device i16 @t2_u16(i16 %x) {
+; CHECK: mov.u16 rh0, rh1;
+; CHECK: ret;
+	ret i16 %x
+}
+
+define ptx_device i32 @t2_u32(i32 %x) {
+; CHECK: mov.u32 r0, r1;
 ; CHECK: ret;
 	ret i32 %x
 }
+
+define ptx_device i64 @t2_u64(i64 %x) {
+; CHECK: mov.u64 rd0, rd1;
+; CHECK: ret;
+	ret i64 %x
+}
+
+define ptx_device float @t3_f32(float %x) {
+; CHECK: mov.f32 f0, f1;
+; CHECK-NEXT: ret;
+	ret float %x
+}
+
+define ptx_device double @t3_f64(double %x) {
+; CHECK: mov.f64 fd0, fd1;
+; CHECK-NEXT: ret;
+	ret double %x
+}
+
diff --git a/test/CodeGen/PTX/mul.ll b/test/CodeGen/PTX/mul.ll
new file mode 100644
index 0000000..5ce0426
--- /dev/null
+++ b/test/CodeGen/PTX/mul.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+;define ptx_device i32 @t1(i32 %x, i32 %y) {
+;	%z = mul i32 %x, %y
+;	ret i32 %z
+;}
+
+;define ptx_device i32 @t2(i32 %x) {
+;	%z = mul i32 %x, 1
+;	ret i32 %z
+;}
+
+define ptx_device float @t1_f32(float %x, float %y) {
+; CHECK: mul.f32 f0, f1, f2
+; CHECK-NEXT: ret;
+  %z = fmul float %x, %y
+  ret float %z
+}
+
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: mul.f64 fd0, fd1, fd2
+; CHECK-NEXT: ret;
+  %z = fmul double %x, %y
+  ret double %z
+}
+
+define ptx_device float @t2_f32(float %x) {
+; CHECK: mul.f32 f0, f1, 0F40A00000;
+; CHECK-NEXT: ret;
+  %z = fmul float %x, 5.0
+  ret float %z
+}
+
+define ptx_device double @t2_f64(double %x) {
+; CHECK: mul.f64 fd0, fd1, 0D4014000000000000;
+; CHECK-NEXT: ret;
+  %z = fmul double %x, 5.0
+  ret double %z
+}
diff --git a/test/CodeGen/PTX/options.ll b/test/CodeGen/PTX/options.ll
index a14d5c9..ac33fef 100644
--- a/test/CodeGen/PTX/options.ll
+++ b/test/CodeGen/PTX/options.ll
@@ -1,5 +1,9 @@
-; RUN: llc < %s -march=ptx -ptx-version=2.0 | grep ".version 2.0"
-; RUN: llc < %s -march=ptx -ptx-target=sm_20 | grep ".target sm_20"
+; RUN: llc < %s -march=ptx32 -mattr=ptx20 | grep ".version 2.0"
+; RUN: llc < %s -march=ptx32 -mattr=ptx21 | grep ".version 2.1"
+; RUN: llc < %s -march=ptx32 -mattr=ptx22 | grep ".version 2.2"
+; RUN: llc < %s -march=ptx32 -mattr=sm10 | grep ".target sm_10"
+; RUN: llc < %s -march=ptx32 -mattr=sm13 | grep ".target sm_13"
+; RUN: llc < %s -march=ptx32 -mattr=sm20 | grep ".target sm_20"
 
 define ptx_device void @t1() {
 	ret void
diff --git a/test/CodeGen/PTX/parameter-order.ll b/test/CodeGen/PTX/parameter-order.ll
new file mode 100644
index 0000000..95d4a32
--- /dev/null
+++ b/test/CodeGen/PTX/parameter-order.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+; CHECK: .func (.reg .u32 r0) test_parameter_order (.reg .f32 f1, .reg .u32 r1, .reg .u32 r2, .reg .f32 f2)
+define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) {
+; CHECK: sub.u32 r0, r1, r2
+	%result = sub i32 %b, %c
+	ret i32 %result
+}
diff --git a/test/CodeGen/PTX/ret.ll b/test/CodeGen/PTX/ret.ll
index d5037f2..ba0523f 100644
--- a/test/CodeGen/PTX/ret.ll
+++ b/test/CodeGen/PTX/ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ptx | FileCheck %s
+; RUN: llc < %s -march=ptx32 | FileCheck %s
 
 define ptx_device void @t1() {
 ; CHECK: ret;
diff --git a/test/CodeGen/PTX/setp.ll b/test/CodeGen/PTX/setp.ll
new file mode 100644
index 0000000..5836122
--- /dev/null
+++ b/test/CodeGen/PTX/setp.ll
@@ -0,0 +1,134 @@
+; RUN: llc < %s -march=ptx32 | FileCheck %s
+
+define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) {
+; CHECK: setp.eq.u32 p0, r1, r2;
+; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp eq i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) {
+; CHECK: setp.ne.u32 p0, r1, r2;
+; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp ne i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) {
+; CHECK: setp.lt.u32 p0, r1, r2;
+; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp ult i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) {
+; CHECK: setp.le.u32 p0, r1, r2;
+; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp ule i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) {
+; CHECK: setp.gt.u32 p0, r1, r2;
+; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp ugt i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) {
+; CHECK: setp.ge.u32 p0, r1, r2;
+; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp uge i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) {
+; CHECK: setp.eq.u32 p0, r1, 1;
+; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp eq i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) {
+; CHECK: setp.ne.u32 p0, r1, 1;
+; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp ne i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) {
+; CHECK: setp.eq.u32 p0, r1, 0;
+; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp ult i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_le_u32_ri(i32 %x) {
+; CHECK: setp.lt.u32 p0, r1, 2;
+; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp ule i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) {
+; CHECK: setp.gt.u32 p0, r1, 1;
+; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp ugt i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) {
+; CHECK: setp.ne.u32 p0, r1, 0;
+; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp uge i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) {
+; CHECK: setp.gt.u32 p0, r3, r4;
+; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, p0;
+; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: ret;
+	%c = icmp eq i32 %x, %y
+	%d = icmp ugt i32 %u, %v
+	%e = and i1 %c, %d
+	%z = zext i1 %e to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) {
+; CHECK: cvt.pred.u32 p0, r3;
+; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, !p0;
+; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: ret;
+	%c = trunc i32 %w to i1
+	%d = icmp eq i32 %x, %y
+	%e = xor i1 %c, 1
+	%f = and i1 %d, %e
+	%z = zext i1 %f to i32
+	ret i32 %z
+}
diff --git a/test/CodeGen/PTX/shl.ll b/test/CodeGen/PTX/shl.ll
index b564b43..6e72c92 100644
--- a/test/CodeGen/PTX/shl.ll
+++ b/test/CodeGen/PTX/shl.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ptx | FileCheck %s
+; RUN: llc < %s -march=ptx32 | FileCheck %s
 
 define ptx_device i32 @t1(i32 %x, i32 %y) {
 ; CHECK: shl.b32 r0, r1, r2
diff --git a/test/CodeGen/PTX/shr.ll b/test/CodeGen/PTX/shr.ll
index 3f8ade8..8693e0e 100644
--- a/test/CodeGen/PTX/shr.ll
+++ b/test/CodeGen/PTX/shr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ptx | FileCheck %s
+; RUN: llc < %s -march=ptx32 | FileCheck %s
 
 define ptx_device i32 @t1(i32 %x, i32 %y) {
 ; CHECK: shr.u32 r0, r1, r2
diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll
index 2cbacb9..dee5c61 100644
--- a/test/CodeGen/PTX/st.ll
+++ b/test/CodeGen/PTX/st.ll
@@ -1,71 +1,402 @@
-; RUN: llc < %s -march=ptx | FileCheck %s
+; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-;CHECK: .extern .global .s32 array[];
-@array = external global [10 x i32]
+;CHECK: .extern .global .b8 array_i16[20];
+@array_i16 = external global [10 x i16]
 
-;CHECK: .extern .const .s32 array_constant[];
-@array_constant = external addrspace(1) constant [10 x i32]
+;CHECK: .extern .const .b8 array_constant_i16[20];
+@array_constant_i16 = external addrspace(1) constant [10 x i16]
 
-;CHECK: .extern .local .s32 array_local[];
-@array_local = external addrspace(2) global [10 x i32]
+;CHECK: .extern .local .b8 array_local_i16[20];
+@array_local_i16 = external addrspace(2) global [10 x i16]
 
-;CHECK: .extern .shared .s32 array_shared[];
-@array_shared = external addrspace(4) global [10 x i32]
+;CHECK: .extern .shared .b8 array_shared_i16[20];
+@array_shared_i16 = external addrspace(4) global [10 x i16]
 
-define ptx_device void @t1(i32* %p, i32 %x) {
+;CHECK: .extern .global .b8 array_i32[40];
+@array_i32 = external global [10 x i32]
+
+;CHECK: .extern .const .b8 array_constant_i32[40];
+@array_constant_i32 = external addrspace(1) constant [10 x i32]
+
+;CHECK: .extern .local .b8 array_local_i32[40];
+@array_local_i32 = external addrspace(2) global [10 x i32]
+
+;CHECK: .extern .shared .b8 array_shared_i32[40];
+@array_shared_i32 = external addrspace(4) global [10 x i32]
+
+;CHECK: .extern .global .b8 array_i64[80];
+@array_i64 = external global [10 x i64]
+
+;CHECK: .extern .const .b8 array_constant_i64[80];
+@array_constant_i64 = external addrspace(1) constant [10 x i64]
+
+;CHECK: .extern .local .b8 array_local_i64[80];
+@array_local_i64 = external addrspace(2) global [10 x i64]
+
+;CHECK: .extern .shared .b8 array_shared_i64[80];
+@array_shared_i64 = external addrspace(4) global [10 x i64]
+
+;CHECK: .extern .global .b8 array_float[40];
+@array_float = external global [10 x float]
+
+;CHECK: .extern .const .b8 array_constant_float[40];
+@array_constant_float = external addrspace(1) constant [10 x float]
+
+;CHECK: .extern .local .b8 array_local_float[40];
+@array_local_float = external addrspace(2) global [10 x float]
+
+;CHECK: .extern .shared .b8 array_shared_float[40];
+@array_shared_float = external addrspace(4) global [10 x float]
+
+;CHECK: .extern .global .b8 array_double[80];
+@array_double = external global [10 x double]
+
+;CHECK: .extern .const .b8 array_constant_double[80];
+@array_constant_double = external addrspace(1) constant [10 x double]
+
+;CHECK: .extern .local .b8 array_local_double[80];
+@array_local_double = external addrspace(2) global [10 x double]
+
+;CHECK: .extern .shared .b8 array_shared_double[80];
+@array_shared_double = external addrspace(4) global [10 x double]
+
+
+define ptx_device void @t1_u16(i16* %p, i16 %x) {
 entry:
-;CHECK: st.global.s32 [r1], r2;
+;CHECK: st.global.u16 [r1], rh1;
+;CHECK-NEXT: ret;
+  store i16 %x, i16* %p
+  ret void
+}
+
+define ptx_device void @t1_u32(i32* %p, i32 %x) {
+entry:
+;CHECK: st.global.u32 [r1], r2;
+;CHECK-NEXT: ret;
   store i32 %x, i32* %p
   ret void
 }
 
-define ptx_device void @t2(i32* %p, i32 %x) {
+define ptx_device void @t1_u64(i64* %p, i64 %x) {
+entry:
+;CHECK: st.global.u64 [r1], rd1;
+;CHECK-NEXT: ret;
+  store i64 %x, i64* %p
+  ret void
+}
+
+define ptx_device void @t1_f32(float* %p, float %x) {
+entry:
+;CHECK: st.global.f32 [r1], f1;
+;CHECK-NEXT: ret;
+  store float %x, float* %p
+  ret void
+}
+
+define ptx_device void @t1_f64(double* %p, double %x) {
+entry:
+;CHECK: st.global.f64 [r1], fd1;
+;CHECK-NEXT: ret;
+  store double %x, double* %p
+  ret void
+}
+
+define ptx_device void @t2_u16(i16* %p, i16 %x) {
 entry:
-;CHECK: st.global.s32 [r1+4], r2;
+;CHECK: st.global.u16 [r1+2], rh1;
+;CHECK-NEXT: ret;
+  %i = getelementptr i16* %p, i32 1
+  store i16 %x, i16* %i
+  ret void
+}
+
+define ptx_device void @t2_u32(i32* %p, i32 %x) {
+entry:
+;CHECK: st.global.u32 [r1+4], r2;
+;CHECK-NEXT: ret;
   %i = getelementptr i32* %p, i32 1
   store i32 %x, i32* %i
   ret void
 }
 
-define ptx_device void @t3(i32* %p, i32 %q, i32 %x) {
-;CHECK: .reg .s32 r0;
+define ptx_device void @t2_u64(i64* %p, i64 %x) {
+entry:
+;CHECK: st.global.u64 [r1+8], rd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr i64* %p, i32 1
+  store i64 %x, i64* %i
+  ret void
+}
+
+define ptx_device void @t2_f32(float* %p, float %x) {
+entry:
+;CHECK: st.global.f32 [r1+4], f1;
+;CHECK-NEXT: ret;
+  %i = getelementptr float* %p, i32 1
+  store float %x, float* %i
+  ret void
+}
+
+define ptx_device void @t2_f64(double* %p, double %x) {
+entry:
+;CHECK: st.global.f64 [r1+8], fd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr double* %p, i32 1
+  store double %x, double* %i
+  ret void
+}
+
+define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
+entry:
+;CHECK: shl.b32 r0, r2, 1;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: st.global.u16 [r0], rh1;
+;CHECK-NEXT: ret;
+  %i = getelementptr i16* %p, i32 %q
+  store i16 %x, i16* %i
+  ret void
+}
+
+define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) {
 entry:
 ;CHECK: shl.b32 r0, r2, 2;
-;CHECK: add.s32 r0, r1, r0;
-;CHECK: st.global.s32 [r0], r3;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: st.global.u32 [r0], r3;
+;CHECK-NEXT: ret;
   %i = getelementptr i32* %p, i32 %q
   store i32 %x, i32* %i
   ret void
 }
 
-define ptx_device void @t4_global(i32 %x) {
+define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) {
+entry:
+;CHECK: shl.b32 r0, r2, 3;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: st.global.u64 [r0], rd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr i64* %p, i32 %q
+  store i64 %x, i64* %i
+  ret void
+}
+
+define ptx_device void @t3_f32(float* %p, i32 %q, float %x) {
+entry:
+;CHECK: shl.b32 r0, r2, 2;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: st.global.f32 [r0], f1;
+;CHECK-NEXT: ret;
+  %i = getelementptr float* %p, i32 %q
+  store float %x, float* %i
+  ret void
+}
+
+define ptx_device void @t3_f64(double* %p, i32 %q, double %x) {
+entry:
+;CHECK: shl.b32 r0, r2, 3;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: st.global.f64 [r0], fd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr double* %p, i32 %q
+  store double %x, double* %i
+  ret void
+}
+
+define ptx_device void @t4_global_u16(i16 %x) {
+entry:
+;CHECK: mov.u32 r0, array_i16;
+;CHECK-NEXT: st.global.u16 [r0], rh1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0
+  store i16 %x, i16* %i
+  ret void
+}
+
+define ptx_device void @t4_global_u32(i32 %x) {
 entry:
-;CHECK: st.global.s32 [array], r1;
-  %i = getelementptr [10 x i32]* @array, i32 0, i32 0
+;CHECK: mov.u32 r0, array_i32;
+;CHECK-NEXT: st.global.u32 [r0], r1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
   store i32 %x, i32* %i
   ret void
 }
 
-define ptx_device void @t4_local(i32 %x) {
+define ptx_device void @t4_global_u64(i64 %x) {
+entry:
+;CHECK: mov.u32 r0, array_i64;
+;CHECK-NEXT: st.global.u64 [r0], rd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
+  store i64 %x, i64* %i
+  ret void
+}
+
+define ptx_device void @t4_global_f32(float %x) {
+entry:
+;CHECK: mov.u32 r0, array_float;
+;CHECK-NEXT: st.global.f32 [r0], f1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
+  store float %x, float* %i
+  ret void
+}
+
+define ptx_device void @t4_global_f64(double %x) {
+entry:
+;CHECK: mov.u32 r0, array_double;
+;CHECK-NEXT: st.global.f64 [r0], fd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
+  store double %x, double* %i
+  ret void
+}
+
+define ptx_device void @t4_local_u16(i16 %x) {
 entry:
-;CHECK: st.local.s32 [array_local], r1;
-  %i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0
+;CHECK: mov.u32 r0, array_local_i16;
+;CHECK-NEXT: st.local.u16 [r0], rh1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
+  store i16 %x, i16 addrspace(2)* %i
+  ret void
+}
+
+define ptx_device void @t4_local_u32(i32 %x) {
+entry:
+;CHECK: mov.u32 r0, array_local_i32;
+;CHECK-NEXT: st.local.u32 [r0], r1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
   store i32 %x, i32 addrspace(2)* %i
   ret void
 }
 
-define ptx_device void @t4_shared(i32 %x) {
+define ptx_device void @t4_local_u64(i64 %x) {
+entry:
+;CHECK: mov.u32 r0, array_local_i64;
+;CHECK-NEXT: st.local.u64 [r0], rd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
+  store i64 %x, i64 addrspace(2)* %i
+  ret void
+}
+
+define ptx_device void @t4_local_f32(float %x) {
+entry:
+;CHECK: mov.u32 r0, array_local_float;
+;CHECK-NEXT: st.local.f32 [r0], f1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
+  store float %x, float addrspace(2)* %i
+  ret void
+}
+
+define ptx_device void @t4_local_f64(double %x) {
+entry:
+;CHECK: mov.u32 r0, array_local_double;
+;CHECK-NEXT: st.local.f64 [r0], fd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
+  store double %x, double addrspace(2)* %i
+  ret void
+}
+
+define ptx_device void @t4_shared_u16(i16 %x) {
 entry:
-;CHECK: st.shared.s32 [array_shared], r1;
-  %i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0
+;CHECK: mov.u32 r0, array_shared_i16;
+;CHECK-NEXT: st.shared.u16 [r0], rh1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
+  store i16 %x, i16 addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t4_shared_u32(i32 %x) {
+entry:
+;CHECK: mov.u32 r0, array_shared_i32;
+;CHECK-NEXT: st.shared.u32 [r0], r1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
   store i32 %x, i32 addrspace(4)* %i
   ret void
 }
 
-define ptx_device void @t5(i32 %x) {
+define ptx_device void @t4_shared_u64(i64 %x) {
+entry:
+;CHECK: mov.u32 r0, array_shared_i64;
+;CHECK-NEXT: st.shared.u64 [r0], rd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
+  store i64 %x, i64 addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t4_shared_f32(float %x) {
+entry:
+;CHECK: mov.u32 r0, array_shared_float;
+;CHECK-NEXT: st.shared.f32 [r0], f1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
+  store float %x, float addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t4_shared_f64(double %x) {
+entry:
+;CHECK: mov.u32 r0, array_shared_double;
+;CHECK-NEXT: st.shared.f64 [r0], fd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
+  store double %x, double addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t5_u16(i16 %x) {
 entry:
-;CHECK: st.global.s32 [array+4], r1;
-  %i = getelementptr [10 x i32]* @array, i32 0, i32 1
+;CHECK: mov.u32 r0, array_i16;
+;CHECK-NEXT: st.global.u16 [r0+2], rh1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
+  store i16 %x, i16* %i
+  ret void
+}
+
+define ptx_device void @t5_u32(i32 %x) {
+entry:
+;CHECK: mov.u32 r0, array_i32;
+;CHECK-NEXT: st.global.u32 [r0+4], r1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
   store i32 %x, i32* %i
   ret void
 }
+
+define ptx_device void @t5_u64(i64 %x) {
+entry:
+;CHECK: mov.u32 r0, array_i64;
+;CHECK-NEXT: st.global.u64 [r0+8], rd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
+  store i64 %x, i64* %i
+  ret void
+}
+
+define ptx_device void @t5_f32(float %x) {
+entry:
+;CHECK: mov.u32 r0, array_float;
+;CHECK-NEXT: st.global.f32 [r0+4], f1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
+  store float %x, float* %i
+  ret void
+}
+
+define ptx_device void @t5_f64(double %x) {
+entry:
+;CHECK: mov.u32 r0, array_double;
+;CHECK-NEXT: st.global.f64 [r0+8], fd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
+  store double %x, double* %i
+  ret void
+}
diff --git a/test/CodeGen/PTX/sub.ll b/test/CodeGen/PTX/sub.ll
index aab3fda..7dd2c6f 100644
--- a/test/CodeGen/PTX/sub.ll
+++ b/test/CodeGen/PTX/sub.ll
@@ -1,15 +1,71 @@
-; RUN: llc < %s -march=ptx | FileCheck %s
+; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-define ptx_device i32 @t1(i32 %x, i32 %y) {
-;CHECK: sub.s32 r0, r1, r2;
+define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
+; CHECK: sub.u16 rh0, rh1, rh2;
+; CHECK-NEXT: ret;
+	%z = sub i16 %x, %y
+	ret i16 %z
+}
+
+define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
+; CHECK: sub.u32 r0, r1, r2;
+; CHECK-NEXT: ret;
 	%z = sub i32 %x, %y
-;CHECK: ret;
 	ret i32 %z
 }
 
-define ptx_device i32 @t2(i32 %x) {
-;CHECK: add.s32 r0, r1, -1;
+define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
+; CHECK: sub.u64 rd0, rd1, rd2;
+; CHECK-NEXT: ret;
+	%z = sub i64 %x, %y
+	ret i64 %z
+}
+
+define ptx_device float @t1_f32(float %x, float %y) {
+; CHECK: sub.f32 f0, f1, f2
+; CHECK-NEXT: ret;
+  %z = fsub float %x, %y
+  ret float %z
+}
+
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: sub.f64 fd0, fd1, fd2
+; CHECK-NEXT: ret;
+  %z = fsub double %x, %y
+  ret double %z
+}
+
+define ptx_device i16 @t2_u16(i16 %x) {
+; CHECK: add.u16 rh0, rh1, -1;
+; CHECK-NEXT: ret;
+	%z = sub i16 %x, 1
+	ret i16 %z
+}
+
+define ptx_device i32 @t2_u32(i32 %x) {
+; CHECK: add.u32 r0, r1, -1;
+; CHECK-NEXT: ret;
 	%z = sub i32 %x, 1
-;CHECK: ret;
 	ret i32 %z
 }
+
+define ptx_device i64 @t2_u64(i64 %x) {
+; CHECK: add.u64 rd0, rd1, -1;
+; CHECK-NEXT: ret;
+	%z = sub i64 %x, 1
+	ret i64 %z
+}
+
+define ptx_device float @t2_f32(float %x) {
+; CHECK: add.f32 f0, f1, 0FBF800000;
+; CHECK-NEXT: ret;
+  %z = fsub float %x, 1.0
+  ret float %z
+}
+
+define ptx_device double @t2_f64(double %x) {
+; CHECK: add.f64 fd0, fd1, 0DBFF0000000000000;
+; CHECK-NEXT: ret;
+  %z = fsub double %x, 1.0
+  ret double %z
+}
diff --git a/test/CodeGen/PowerPC/2008-12-12-EH.ll b/test/CodeGen/PowerPC/2008-12-12-EH.ll
index 2315e36..a2a5e9e 100644
--- a/test/CodeGen/PowerPC/2008-12-12-EH.ll
+++ b/test/CodeGen/PowerPC/2008-12-12-EH.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s  -march=ppc32 -mtriple=powerpc-apple-darwin9 | grep ^__Z1fv.eh
+; RUN: llc < %s -disable-cfi -march=ppc32 -mtriple=powerpc-apple-darwin9 | grep ^__Z1fv.eh
 
 define void @_Z1fv() {
 entry:
diff --git a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
index b10920a..72ae9d6 100644
--- a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
+++ b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 -regalloc=basic | FileCheck %s
 
 declare i8* @llvm.frameaddress(i32) nounwind readnone
 
diff --git a/test/CodeGen/PowerPC/Atomics-64.ll b/test/CodeGen/PowerPC/Atomics-64.ll
index 1dc4310..cfc1eb9 100644
--- a/test/CodeGen/PowerPC/Atomics-64.ll
+++ b/test/CodeGen/PowerPC/Atomics-64.ll
@@ -1,5 +1,11 @@
-; RUN: llc < %s -march=ppc64
-; ModuleID = 'Atomics.c'
+; RUN: llc < %s -march=ppc64 -verify-machineinstrs
+;
+; This test is disabled until PPCISelLowering learns to insert proper 64-bit
+; code for ATOMIC_CMP_SWAP. Currently, it is inserting 32-bit instructions with
+; 64-bit operands which causes the machine code verifier to throw a tantrum.
+;
+; XFAIL: *
+
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin9"
 @sc = common global i8 0		; <i8*> [#uses=52]
diff --git a/test/CodeGen/PowerPC/Frames-small.ll b/test/CodeGen/PowerPC/Frames-small.ll
index 404fdd0..ecd5ecd 100644
--- a/test/CodeGen/PowerPC/Frames-small.ll
+++ b/test/CodeGen/PowerPC/Frames-small.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1
-; RUN  not grep {stw r31, -4(r1)} %t1
+; RUN: not grep {stw r31, -4(r1)} %t1
 ; RUN: grep {stwu r1, -16448(r1)} %t1
 ; RUN: grep {addi r1, r1, 16448} %t1
 ; RUN: llc < %s -march=ppc32 | \
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index 5122ab3..ac56625 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -43,13 +43,13 @@ L2:                                               ; preds = %L3, %bb2
 
 L1:                                               ; preds = %L2, %bb2
   %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
-; PIC: addis r4, r4, ha16(Ltmp0-L0$pb)
-; PIC: li r6, lo16(Ltmp0-L0$pb)
-; PIC: add r4, r4, r6
-; PIC: stw r4
-; STATIC: li r5, lo16(Ltmp0)
-; STATIC: addis r5, r5, ha16(Ltmp0)
-; STATIC: stw r5
+; PIC: addis r[[R0:[0-9]+]], r{{[0-9]+}}, ha16(Ltmp0-L0$pb)
+; PIC: li r[[R1:[0-9]+]], lo16(Ltmp0-L0$pb)
+; PIC: add r[[R2:[0-9]+]], r[[R0]], r[[R1]]
+; PIC: stw r[[R2]]
+; STATIC: li r[[R0:[0-9]+]], lo16(Ltmp0)
+; STATIC: addis r[[R0]], r[[R0]], ha16(Ltmp0)
+; STATIC: stw r[[R0]]
   store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
   ret i32 %res.3
 }
diff --git a/test/CodeGen/PowerPC/mulhs.ll b/test/CodeGen/PowerPC/mulhs.ll
index 9ab8d99..5b02e18 100644
--- a/test/CodeGen/PowerPC/mulhs.ll
+++ b/test/CodeGen/PowerPC/mulhs.ll
@@ -5,7 +5,7 @@
 ; RUN: not grep add %t 
 ; RUN: grep mulhw %t | count 1
 
-define i32 @mulhs(i32 %a, i32 %b) {
+define i32 @mulhs(i32 %a, i32 %b) nounwind {
 entry:
         %tmp.1 = sext i32 %a to i64             ; <i64> [#uses=1]
         %tmp.3 = sext i32 %b to i64             ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/ppc-prologue.ll b/test/CodeGen/PowerPC/ppc-prologue.ll
index 2ebfd3c..5538371 100644
--- a/test/CodeGen/PowerPC/ppc-prologue.ll
+++ b/test/CodeGen/PowerPC/ppc-prologue.ll
@@ -5,9 +5,7 @@ define i32 @_Z4funci(i32 %a) ssp {
 ; CHECK-NEXT:  stw r31, -4(r1)
 ; CHECK-NEXT:  stw r0, 8(r1)
 ; CHECK-NEXT:  stwu r1, -80(r1)
-; CHECK-NEXT: Ltmp0:
-; CHECK-NEXT:  mr r31, r1
-; CHECK-NEXT: Ltmp1:
+; CHECK:  mr r31, r1
 entry:
   %a_addr = alloca i32                            ; <i32*> [#uses=2]
   %retval = alloca i32                            ; <i32*> [#uses=2]
diff --git a/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
index fbf7242..9e6583c 100644
--- a/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
+++ b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
@@ -1,5 +1,7 @@
 ;RUN: llc -march=sparc < %s | FileCheck %s -check-prefix=V8
 ;RUN: llc -march=sparc -mattr=v9 < %s | FileCheck %s -check-prefix=V9
+;RUN: llc -march=sparc -regalloc=basic < %s | FileCheck %s -check-prefix=V8
+;RUN: llc -march=sparc -regalloc=basic -mattr=v9 < %s | FileCheck %s -check-prefix=V9
 
 define i8* @frameaddr() nounwind readnone {
 entry:
diff --git a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
index 98feb83..92f5467 100644
--- a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
+++ b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -regalloc=basic | FileCheck %s
 
 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
 target triple = "s390x-ibm-linux"
@@ -8,7 +9,7 @@ declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) n
 declare double @mp_mul_d2i_test(i32 signext, i32 signext, double* nocapture) nounwind
 
 define void @mp_mul_radix_test_bb3(i32 %radix, i32 %nfft, double* %tmpfft, i32* %ip, double* %w, double* %arrayidx44.reload, double* %call.out) nounwind {
-; CHECK: lg %r11, 328(%r15)
+; CHECK: lg %r{{[0-9]+}}, 328(%r15)
 
 newFuncRoot:
 	br label %bb3
diff --git a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
index 39612c0..d6ca0d7 100644
--- a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
+++ b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6 -verify-machineinstrs | FileCheck %s
 ; rdar://7157006
 
 %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll b/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll
deleted file mode 100644
index fad2669..0000000
--- a/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -regalloc=fast -relocation-model=pic | FileCheck %s
-
-target triple = "thumbv6-apple-darwin10"
-
-@fred = internal global i32 0              ; <i32*> [#uses=1]
-
-define void @foo() nounwind {
-entry:
-; CHECK: str r0, [sp
-  %0 = call  i32 (...)* @bar() nounwind ; <i32> [#uses=1]
-; CHECK: blx _bar
-; CHECK: ldr r1, [sp
-  store i32 %0, i32* @fred, align 4
-  br label %return
-
-return:                                           ; preds = %entry
-  ret void
-}
-
-declare i32 @bar(...)
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
index 06c0dfe..9f5a677 100644
--- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -10,7 +10,7 @@
 define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind {
 ; CHECK: blx ___muldf3
 ; CHECK: blx ___muldf3
-; CHECK: beq LBB0_7
+; CHECK: beq LBB0
 ; CHECK: blx ___muldf3
 ; <label>:3
   switch i32 %1, label %4 [
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index 1f31dca..cd35be6 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s
 
 	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
 	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
@@ -7,9 +8,10 @@ define void @t1(%struct.state* %v) {
 ; CHECK: t1:
 ; CHECK: push
 ; CHECK: add r7, sp, #12
-; CHECK: mov r2, sp
-; CHECK: subs r4, r2, r1
-; CHECK: mov sp, r4
+; CHECK: lsls r[[R0:[0-9]+]]
+; CHECK: mov r[[R1:[0-9]+]], sp
+; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r[[R0]]
+; CHECK: mov sp, r[[R2]]
 	%tmp6 = load i32* null
 	%tmp8 = alloca float, i32 %tmp6
 	store i32 1, i32* null
@@ -40,15 +42,16 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
 ; CHECK: t2:
 ; CHECK: push
 ; CHECK: add r7, sp, #12
-; CHECK: sub sp, #8
-; CHECK: mov r6, sp
-; CHECK: str r2, [r6, #4]
-; CHECK: str r0, [r6]
+; CHECK: sub sp, #
+; CHECK: mov r[[R0:[0-9]+]], sp
+; CHECK: str r{{[0-9+]}}, [r[[R0]]
+; CHECK: str r{{[0-9+]}}, [r[[R0]]
 ; CHECK-NOT: ldr r0, [sp
-; CHECK: ldr r0, [r6, #4]
-; CHECK: mov r0, sp
-; CHECK: subs r5, r0, r1
-; CHECK: mov sp, r5
+; CHECK: mov r[[R1:[0-9]+]], sp
+; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r{{[0-9]+}}
+; CHECK: mov sp, r[[R2]]
+; CHECK-NOT: ldr r0, [sp
+; CHECK: bx
 	%tmp1 = call i32 @strlen( i8* %tag )
 	%tmp3 = call i32 @strlen( i8* %contents )
 	%tmp4 = add i32 %tmp1, 2
diff --git a/test/CodeGen/Thumb/rev.ll b/test/CodeGen/Thumb/rev.ll
new file mode 100644
index 0000000..5e163f8
--- /dev/null
+++ b/test/CodeGen/Thumb/rev.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=thumb -mattr=+v6 | FileCheck %s
+
+define i32 @test1(i32 %X) nounwind {
+; CHECK: test1
+; CHECK: rev16 r0, r0
+        %tmp1 = lshr i32 %X, 8
+        %X15 = bitcast i32 %X to i32
+        %tmp4 = shl i32 %X15, 8
+        %tmp2 = and i32 %tmp1, 16711680
+        %tmp5 = and i32 %tmp4, -16777216
+        %tmp9 = and i32 %tmp1, 255
+        %tmp13 = and i32 %tmp4, 65280
+        %tmp6 = or i32 %tmp5, %tmp2
+        %tmp10 = or i32 %tmp6, %tmp13
+        %tmp14 = or i32 %tmp10, %tmp9
+        ret i32 %tmp14
+}
+
+define i32 @test2(i32 %X) nounwind {
+; CHECK: test2
+; CHECK: revsh r0, r0
+        %tmp1 = lshr i32 %X, 8
+        %tmp1.upgrd.1 = trunc i32 %tmp1 to i16
+        %tmp3 = trunc i32 %X to i16
+        %tmp2 = and i16 %tmp1.upgrd.1, 255
+        %tmp4 = shl i16 %tmp3, 8
+        %tmp5 = or i16 %tmp2, %tmp4
+        %tmp5.upgrd.2 = sext i16 %tmp5 to i32
+        ret i32 %tmp5.upgrd.2
+}
+
+; rdar://9147637
+define i32 @test3(i16 zeroext %a) nounwind {
+entry:
+; CHECK: test3:
+; CHECK: revsh r0, r0
+  %0 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %1 = sext i16 %0 to i32
+  ret i32 %1
+}
+
+declare i16 @llvm.bswap.i16(i16) nounwind readnone
+
+define i32 @test4(i16 zeroext %a) nounwind {
+entry:
+; CHECK: test4:
+; CHECK: revsh r0, r0
+  %conv = zext i16 %a to i32
+  %shr9 = lshr i16 %a, 8
+  %conv2 = zext i16 %shr9 to i32
+  %shl = shl nuw nsw i32 %conv, 8
+  %or = or i32 %conv2, %shl
+  %sext = shl i32 %or, 16
+  %conv8 = ashr exact i32 %sext, 16
+  ret i32 %conv8
+}
diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
index 550b3ef..ff68e66 100644
--- a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
@@ -8,7 +8,7 @@ entry:
 ; CHECK: sub sp, #8
 ; CHECK: push
 ; CHECK: add r7, sp, #4
-; CHECK: subs r4, r7, #4
+; CHECK: sub.w r4, r7, #4
 ; CHECK: mov sp, r4
 ; CHECK-NOT: mov sp, r7
 ; CHECK: add sp, #8
diff --git a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
index b2ed8fc..ac3e80a 100644
--- a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
+++ b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-cgp-branch-opts | FileCheck %s
 
 %struct.pix_pos = type { i32, i32, i32, i32, i32, i32 }
 
diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
index 45d356c..3594424 100644
--- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
+++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv7-eabi -mcpu=cortex-a8 -float-abi=hard | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-eabi -mcpu=cortex-a8 -float-abi=hard -regalloc=basic | FileCheck %s
 ; PR5204
 
 %"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" = type { i8* }
@@ -11,8 +12,8 @@
 define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
 ; CHECK: _ZNKSs7compareERKSs:
 ; CHECK:      it  eq
-; CHECK-NEXT: subeq r0, r6, r7
-; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: subeq r0, r{{[0-9]+}}, r{{[0-9]+}}
+; CHECK-NEXT: ldmia.w sp!,
 entry:
   %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
   %1 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
diff --git a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
index 458569e..bb734ac 100644
--- a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
+++ b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
@@ -1,10 +1,11 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 \
-; RUN:   -pre-RA-sched=source | FileCheck -check-prefix=SOURCE %s
+; RUN:   -pre-RA-sched=source | FileCheck %s
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 \
-; RUN:   -pre-RA-sched=list-hybrid | FileCheck -check-prefix=HYBRID %s
+; RUN:   -pre-RA-sched=list-hybrid | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -regalloc=basic | FileCheck %s
 ; Radar 7459078
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
-
+	
 %0 = type { i32, i32 }
 %s1 = type { %s3, i32, %s4, i8*, void (i8*, i8*)*, i8*, i32*, i32*, i32*, i32, i64, [1 x i32] }
 %s2 = type { i32 (...)**, %s4 }
@@ -13,11 +14,10 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 %s5 = type { i32 }
 
 ; Make sure the cmp is not scheduled before the InlineAsm that clobbers cc.
-; SOURCE: InlineAsm End
-; SOURCE: cmp
-; SOURCE: beq
-; HYBRID: InlineAsm End
-; HYBRID: cbz
+; CHECK: blx _f2
+; CHECK: cmp r0, #0
+; CHECK-NOT: cmp
+; CHECK: InlineAsm Start
 define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind {
 entry:
   %tmp1 = getelementptr inbounds %s1* %this, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0
diff --git a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
index 313728c..d2140a1 100644
--- a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
+++ b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
@@ -8,7 +8,7 @@ declare void @bar() nounwind optsize
 define void @foo() nounwind optsize {
 ; CHECK: foo:
 ; CHECK: push
-; CHECK: add r7, sp, #4
+; CHECK: mov r7, sp
 ; CHECK: sub sp, #4
 entry:
   %m.i = alloca %struct.buf*, align 4
diff --git a/test/CodeGen/Thumb2/2011-04-21-FILoweringBug.ll b/test/CodeGen/Thumb2/2011-04-21-FILoweringBug.ll
new file mode 100644
index 0000000..604a352
--- /dev/null
+++ b/test/CodeGen/Thumb2/2011-04-21-FILoweringBug.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+; Use sp, #imm to lower frame indices when the offset is multiple of 4
+; and in the range of 0-1020. This saves code size by utilizing
+; 16-bit instructions.
+; rdar://9321541
+
+define i32 @t() nounwind {
+entry:
+; CHECK: t:
+; CHECK: sub sp, #12
+; CHECK-NOT: sub
+; CHECK: add r0, sp, #4
+; CHECK: add r1, sp, #8
+; CHECK: mov r2, sp
+  %size = alloca i32, align 4
+  %count = alloca i32, align 4
+  %index = alloca i32, align 4
+  %0 = call i32 @foo(i32* %count, i32* %size, i32* %index) nounwind
+  ret i32 %0
+}
+
+declare i32 @foo(i32*, i32*, i32*)
diff --git a/test/CodeGen/Thumb2/bfi.ll b/test/CodeGen/Thumb2/bfi.ll
index 6fb2fc8..0e76770 100644
--- a/test/CodeGen/Thumb2/bfi.ll
+++ b/test/CodeGen/Thumb2/bfi.ll
@@ -49,3 +49,14 @@ define i32 @f4(i32 %a) nounwind {
   %ins12 = or i32 %ins7, 3137
   ret i32 %ins12
 }
+
+; rdar://9177502
+define i32 @f5(i32 %a, i32 %b) nounwind readnone {
+entry:
+; CHECK: f5
+; CHECK-NOT: bfi r0, r2, #0, #1
+%and = and i32 %a, 2
+%b.masked = and i32 %b, -2
+%and3 = or i32 %b.masked, %and
+ret i32 %and3
+}
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
index b8c8cb1..edbf834 100644
--- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -6,8 +6,6 @@ entry:
   br label %bb5
 
 bb5:                                              ; preds = %bb5, %entry
-; CHECK: %bb5
-; CHECK: bne
   br i1 undef, label %bb5, label %bb.nph
 
 bb.nph:                                           ; preds = %bb5
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
index 650d788..e1932bd 100644
--- a/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -regalloc=linearscan | FileCheck %s
 ; rdar://7352504
 ; Make sure we use "str r9, [sp, #+28]" instead of "sub.w r4, r7, #256" followed by "str r9, [r4, #-32]".
 
@@ -22,7 +22,7 @@
 
 define %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind {
 entry:
-; CHECK:       ldr.w	{{(r[0-9])|(lr)}}, [r7, #28]
+; CHECK:       ldr.w	{{(r[0-9]+)|(lr)}}, [r7, #28]
   %xgaps.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
   %ycomp.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
   br label %bb20
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 5e776dd..ee054a1 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -14,19 +14,19 @@ entry:
 
 bb.nph:                                           ; preds = %entry
 ; CHECK: BB#1
-; CHECK: movw r2, :lower16:L_GV$non_lazy_ptr
-; CHECK: movt r2, :upper16:L_GV$non_lazy_ptr
-; CHECK: ldr r2, [r2]
-; CHECK: ldr r3, [r2]
+; CHECK: movw r[[R2:[0-9]+]], :lower16:L_GV$non_lazy_ptr
+; CHECK: movt r[[R2]], :upper16:L_GV$non_lazy_ptr
+; CHECK: ldr{{(.w)?}} r[[R2b:[0-9]+]], [r[[R2]]
+; CHECK: ldr{{.*}}, [r[[R2b]]
 ; CHECK: LBB0_2
 ; CHECK-NOT: LCPI0_0:
 
 ; PIC: BB#1
-; PIC: movw r2, :lower16:(L_GV$non_lazy_ptr-(LPC0_0+4))
-; PIC: movt r2, :upper16:(L_GV$non_lazy_ptr-(LPC0_0+4))
-; PIC: add r2, pc
-; PIC: ldr r2, [r2]
-; PIC: ldr r3, [r2]
+; PIC: movw r[[R2:[0-9]+]], :lower16:(L_GV$non_lazy_ptr-(LPC0_0+4))
+; PIC: movt r[[R2]], :upper16:(L_GV$non_lazy_ptr-(LPC0_0+4))
+; PIC: add r[[R2]], pc
+; PIC: ldr{{(.w)?}} r[[R2b:[0-9]+]], [r[[R2]]
+; PIC: ldr{{.*}}, [r[[R2b]]
 ; PIC: LBB0_2
 ; PIC-NOT: LCPI0_0:
 ; PIC: .section
@@ -88,10 +88,10 @@ define zeroext i16 @t3(i8 zeroext %data, i16 zeroext %crc) nounwind readnone {
 bb.nph:
 ; CHECK: bb.nph
 ; CHECK: movw {{(r[0-9])|(lr)}}, #32768
-; CHECK: movs {{(r[0-9])|(lr)}}, #8
-; CHECK: movw [[REGISTER:(r[0-9])|(lr)]], #16386
-; CHECK: movw {{(r[0-9])|(lr)}}, #65534
-; CHECK: movt {{(r[0-9])|(lr)}}, #65535
+; CHECK: movs {{(r[0-9]+)|(lr)}}, #0
+; CHECK: movw [[REGISTER:(r[0-9]+)|(lr)]], #16386
+; CHECK: movw {{(r[0-9]+)|(lr)}}, #65534
+; CHECK: movt {{(r[0-9]+)|(lr)}}, #65535
   br label %bb
 
 bb:                                               ; preds = %bb, %bb.nph
diff --git a/test/CodeGen/Thumb2/thumb2-ldrd.ll b/test/CodeGen/Thumb2/thumb2-ldrd.ll
index a747d5f..d3b781d 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrd.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrd.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 -regalloc=linearscan | FileCheck %s
 
 @b = external global i64*
 
diff --git a/test/CodeGen/Thumb2/thumb2-lsr3.ll b/test/CodeGen/Thumb2/thumb2-lsr3.ll
index 5cfd3f5..e7ba782 100644
--- a/test/CodeGen/Thumb2/thumb2-lsr3.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsr3.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i1 @test1(i64 %poscnt, i32 %work) {
 entry:
-; CHECK: rrx r0, r0
 ; CHECK: lsrs.w r1, r1, #1
+; CHECK: rrx r0, r0
 	%0 = lshr i64 %poscnt, 1
 	%1 = icmp eq i64 %0, 0
 	ret i1 %1
@@ -11,8 +11,8 @@ entry:
 
 define i1 @test2(i64 %poscnt, i32 %work) {
 entry:
-; CHECK: rrx r0, r0
 ; CHECK: asrs.w r1, r1, #1
+; CHECK: rrx r0, r0
 	%0 = ashr i64 %poscnt, 1
 	%1 = icmp eq i64 %0, 0
 	ret i1 %1
diff --git a/test/CodeGen/Thumb2/thumb2-ror.ll b/test/CodeGen/Thumb2/thumb2-ror.ll
index 0200116..590c333 100644
--- a/test/CodeGen/Thumb2/thumb2-ror.ll
+++ b/test/CodeGen/Thumb2/thumb2-ror.ll
@@ -1,11 +1,24 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 
+; CHECK: f1:
+; CHECK: 	ror.w	r0, r0, #22
 define i32 @f1(i32 %a) {
     %l8 = shl i32 %a, 10
     %r8 = lshr i32 %a, 22
     %tmp = or i32 %l8, %r8
     ret i32 %tmp
 }
-; CHECK: f1:
-; CHECK: 	ror.w	r0, r0, #22
+
+; CHECK: f2:
+; CHECK-NOT: and
+; CHECK: ror
+define i32 @f2(i32 %v, i32 %nbits) {
+entry:
+  %and = and i32 %nbits, 31
+  %shr = lshr i32 %v, %and
+  %sub = sub i32 32, %and
+  %shl = shl i32 %v, %sub
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Thumb2/thumb2-ror2.ll b/test/CodeGen/Thumb2/thumb2-ror2.ll
deleted file mode 100644
index ffd1dd7..0000000
--- a/test/CodeGen/Thumb2/thumb2-ror2.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
-
-define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
-; CHECK: rors r0, r1
-    %db = sub i32 32, %b
-    %l8 = shl i32 %a, %b
-    %r8 = lshr i32 %a, %db
-    %tmp = or i32 %l8, %r8
-    ret i32 %tmp
-}
diff --git a/test/CodeGen/Thumb2/thumb2-sbc.ll b/test/CodeGen/Thumb2/thumb2-sbc.ll
index de6502d..53f45ea 100644
--- a/test/CodeGen/Thumb2/thumb2-sbc.ll
+++ b/test/CodeGen/Thumb2/thumb2-sbc.ll
@@ -39,7 +39,7 @@ define i64 @f5(i64 %a) {
 ; CHECK: f5
 ; CHECK: subs  r0, #2
 ; CHECK: adc r1, r1, #-1448498775
-    %tmp = sub i64 %a, 6221254862626095106 
+    %tmp = sub i64 %a, 6221254862626095106
     ret i64 %tmp
 }
 
@@ -48,7 +48,22 @@ define i64 @f6(i64 %a) {
 ; CHECK: f6
 ; CHECK: subs  r0, #2
 ; CHECK: sbc r1, r1, #66846720
-    %tmp = sub i64 %a, 287104476244869122 
+    %tmp = sub i64 %a, 287104476244869122
     ret i64 %tmp
 }
 
+; Example from numerics code that manually computes wider-than-64 values.
+;
+; CHECK: livecarry:
+; CHECK: adds
+; CHECK: adcs
+; CHECK: adc
+define i64 @livecarry(i64 %carry, i32 %digit) nounwind {
+  %ch = lshr i64 %carry, 32
+  %cl = and i64 %carry, 4294967295
+  %truncdigit = zext i32 %digit to i64
+  %prod = add i64 %cl, %truncdigit
+  %ph = lshr i64 %prod, 32
+  %carryresult = add i64 %ch, %ph
+  ret i64 %carryresult
+}
diff --git a/test/CodeGen/Thumb2/thumb2-sub3.ll b/test/CodeGen/Thumb2/thumb2-sub3.ll
index 855ad06..1dbda57 100644
--- a/test/CodeGen/Thumb2/thumb2-sub3.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub3.ll
@@ -4,7 +4,7 @@
 define i64 @f1(i64 %a) {
 ; CHECK: f1
 ; CHECK: subs  r0, #171
-; CHECK: adc r1, r1, #-1
+; CHECK: sbc r1, r1, #0
     %tmp = sub i64 %a, 171
     ret i64 %tmp
 }
@@ -13,7 +13,7 @@ define i64 @f1(i64 %a) {
 define i64 @f2(i64 %a) {
 ; CHECK: f2
 ; CHECK: subs.w  r0, r0, #1179666
-; CHECK: adc r1, r1, #-1
+; CHECK: sbc r1, r1, #0
     %tmp = sub i64 %a, 1179666
     ret i64 %tmp
 }
@@ -22,7 +22,7 @@ define i64 @f2(i64 %a) {
 define i64 @f3(i64 %a) {
 ; CHECK: f3
 ; CHECK: subs.w  r0, r0, #872428544
-; CHECK: adc r1, r1, #-1
+; CHECK: sbc r1, r1, #0
     %tmp = sub i64 %a, 872428544
     ret i64 %tmp
 }
@@ -31,7 +31,7 @@ define i64 @f3(i64 %a) {
 define i64 @f4(i64 %a) {
 ; CHECK: f4
 ; CHECK: subs.w  r0, r0, #1448498774
-; CHECK: adc r1, r1, #-1
+; CHECK: sbc r1, r1, #0
     %tmp = sub i64 %a, 1448498774
     ret i64 %tmp
 }
@@ -40,7 +40,7 @@ define i64 @f4(i64 %a) {
 define i64 @f5(i64 %a) {
 ; CHECK: f5
 ; CHECK: subs.w  r0, r0, #66846720
-; CHECK: adc r1, r1, #-1
+; CHECK: sbc r1, r1, #0
     %tmp = sub i64 %a, 66846720
     ret i64 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-sub5.ll b/test/CodeGen/Thumb2/thumb2-sub5.ll
index c3b56bc..6edd789 100644
--- a/test/CodeGen/Thumb2/thumb2-sub5.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub5.ll
@@ -1,9 +1,10 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -mattr=+32bit | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
 ; CHECK: f1:
-; CHECK: subs r0, r0, r2
-; CHECK: sbcs r1, r3
+; CHECK: subs.w r0, r0, r2
+; To test dead_carry, +32bit prevents sbc conveting to 16-bit sbcs
+; CHECK: sbc.w  r1, r1, r3
     %tmp = sub i64 %a, %b
     ret i64 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll
index 2074f98..35914b1 100644
--- a/test/CodeGen/Thumb2/thumb2-uxtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll
@@ -128,9 +128,9 @@ define i32 @test10(i32 %p0) {
 
 ; ARMv7M: test10
 ; ARMv7M: mov.w r1, #16253176
+; ARMv7M: mov.w r2, #458759
 ; ARMv7M: and.w r0, r1, r0, lsr #7
-; ARMv7M: mov.w r1, #458759
-; ARMv7M: and.w r1, r1, r0, lsr #5
+; ARMv7M: and.w r1, r2, r0, lsr #5
 ; ARMv7M: orrs r0, r1
 	%tmp1 = lshr i32 %p0, 7		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16253176		; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll b/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
index de226a1..3458550 100644
--- a/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
+++ b/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
@@ -1,5 +1,8 @@
-; RUN: llc %s -o - -march=x86-64 | grep {(%rdi,%rax,8)}
-; RUN: llc %s -o - -march=x86-64 | not grep {addq.*8}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK-NOT: {{addq.*8}}
+; CHECK:     ({{%rdi|%rcx}},%rax,8)
+; CHECK-NOT: {{addq.*8}}
 
 define void @foo(double* %y) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
index a662dd5..11c0bf9 100644
--- a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
+++ b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
@@ -8,8 +8,8 @@ entry:
 
 bb26:		; preds = %bb26, %entry
 
-; CHECK:  addl  %e
-; CHECK:  adcl  %e
+; CHECK:  addl
+; CHECK:  adcl
 
 	%i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ]		; <i32> [#uses=3]
 	%sum.035.0 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ]		; <<1 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/2007-05-05-Personality.ll b/test/CodeGen/X86/2007-05-05-Personality.ll
index a9b17d3..0f49d2e 100644
--- a/test/CodeGen/X86/2007-05-05-Personality.ll
+++ b/test/CodeGen/X86/2007-05-05-Personality.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -o - | grep zPL
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -o - | FileCheck %s
+
+; CHECK: .cfi_personality 0, __gnat_eh_personality
+; CHECK: .cfi_lsda 0, .Lexception0
 
 @error = external global i8		; <i8*> [#uses=2]
 
diff --git a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
index 1c5e676..187c3e4 100644
--- a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
+++ b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rsi, %mm0}
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rdi, %mm1}
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {paddusw	%mm0, %mm1}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | FileCheck %s
+
+; CHECK: movd %rsi, [[MM0:%mm[0-9]+]]
+; CHECK: movd %rdi, [[MM1:%mm[0-9]+]]
+; CHECK: paddusw [[MM0]], [[MM1]]
 
 @R = external global x86_mmx		; <x86_mmx*> [#uses=1]
 
diff --git a/test/CodeGen/X86/2007-07-18-Vector-Extract.ll b/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
index 8625b27..6288c4a 100644
--- a/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
+++ b/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse | grep {movq	(%rdi), %rax}
-; RUN: llc < %s -march=x86-64 -mattr=+sse | grep {movq	8(%rdi), %rax}
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse | FileCheck %s
+; CHECK: movq ([[A0:%rdi|%rcx]]), %rax
+; CHECK: movq 8([[A0]]), %rax
 define i64 @foo_0(<2 x i64>* %val) {
 entry:
         %val12 = getelementptr <2 x i64>* %val, i32 0, i32 0            ; <i64*> [#uses=1]
diff --git a/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll b/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
index c3403a0..1e43272 100644
--- a/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
+++ b/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep {isNullOrNil].eh"} | count 2
+; RUN: llc < %s -disable-cfi -march=x86 -mtriple=i686-apple-darwin | grep {isNullOrNil].eh"} | FileCheck %s
+
+; CHECK: "_-[NSString(local) isNullOrNil].eh":
 
 	%struct.NSString = type {  }
 	%struct._objc__method_prototype_list = type opaque
diff --git a/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
index 4d69715..b4a986f 100644
--- a/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
+++ b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
@@ -22,8 +22,8 @@ entry:
 ; CHECK: bar:
 ; CHECK: fldt 4(%esp)
 ; CHECK-NEXT: fld	%st(0)
-; CHECK-NEXT: fmul	%st(1)
-; CHECK-NEXT: fmulp	%st(1)
+; CHECK-NEXT: fmul	%st(1), %st(0)
+; CHECK-NEXT: fmulp
 ; CHECK-NEXT: ret
 }
 
diff --git a/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
index a91ac27..8f4d353 100644
--- a/test/CodeGen/X86/2008-02-22-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 2
+; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of re-materialization} | grep 2
 ; rdar://5761454
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
diff --git a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
index 3ae5026..33d658c 100644
--- a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep movss | count 1
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -stats |& grep {Number of re-materialization} | grep 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -regalloc=linearscan | grep movss | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -regalloc=linearscan -stats |& grep {Number of re-materialization} | grep 1
 
 	%struct..0objc_object = type opaque
 	%struct.OhBoy = type {  }
diff --git a/test/CodeGen/X86/2008-04-02-unnamedEH.ll b/test/CodeGen/X86/2008-04-02-unnamedEH.ll
index 27bbbaa..ab8ec801 100644
--- a/test/CodeGen/X86/2008-04-02-unnamedEH.ll
+++ b/test/CodeGen/X86/2008-04-02-unnamedEH.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -disable-cfi | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
diff --git a/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
index 6e8891b..bfe8ef5 100644
--- a/test/CodeGen/X86/2008-04-16-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | grep movw | not grep {, %e}
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-cgp-branch-opts | grep movw | not grep {, %e}
 
 	%struct.DBC_t = type { i32, i8*, i16, %struct.DBC_t*, i8*, i8*, i8*, i8*, i8*, %struct.DBC_t*, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16, i32*, i8, i16, %struct.DRVOPT*, i16 }
 	%struct.DRVOPT = type { i16, i32, i8, %struct.DRVOPT* }
diff --git a/test/CodeGen/X86/2008-07-11-SpillerBug.ll b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
index d0023b2..dee7415 100644
--- a/test/CodeGen/X86/2008-07-11-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
@@ -4,7 +4,6 @@
 ; CHECK: andl    $65534, %
 ; CHECK-NEXT: movl %
 ; CHECK-NEXT: movzwl
-; CHECK-NEXT: movl $17
 
 @g_5 = external global i16		; <i16*> [#uses=2]
 @g_107 = external global i16		; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-07-19-movups-spills.ll b/test/CodeGen/X86/2008-07-19-movups-spills.ll
index cf04dcf..368af6d 100644
--- a/test/CodeGen/X86/2008-07-19-movups-spills.ll
+++ b/test/CodeGen/X86/2008-07-19-movups-spills.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 75
-; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 75
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movups | count 33
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movups | count 33
 ; PR2539
 ; PR8969 - make 32-bit linux have a 16-byte aligned stack
+; Verify that movups is still generated with an aligned stack for the globals
+; that must be accessed unaligned
 
 external global <4 x float>, align 1		; <<4 x float>*>:0 [#uses=2]
 external global <4 x float>, align 1		; <<4 x float>*>:1 [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
deleted file mode 100644
index d9d95b5..0000000
--- a/test/CodeGen/X86/2008-08-05-SpillerBug.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -disable-fp-elim -stats |& grep asm-printer | grep 55
-; PR2568
-
-@g_3 = external global i16		; <i16*> [#uses=1]
-@g_5 = external global i32		; <i32*> [#uses=3]
-
-declare i32 @func_15(i16 signext , i16 signext , i32) nounwind 
-
-define void @func_9_entry_2E_ce(i8 %p_11) nounwind {
-newFuncRoot:
-	br label %entry.ce
-
-entry.ce.ret.exitStub:		; preds = %entry.ce
-	ret void
-
-entry.ce:		; preds = %newFuncRoot
-	load i16* @g_3, align 2		; <i16>:0 [#uses=1]
-	icmp sgt i16 %0, 0		; <i1>:1 [#uses=1]
-	zext i1 %1 to i32		; <i32>:2 [#uses=1]
-	load i32* @g_5, align 4		; <i32>:3 [#uses=4]
-	icmp ugt i32 %2, %3		; <i1>:4 [#uses=1]
-	zext i1 %4 to i32		; <i32>:5 [#uses=1]
-	icmp eq i32 %3, 0		; <i1>:6 [#uses=1]
-	%.0 = select i1 %6, i32 1, i32 %3		; <i32> [#uses=1]
-	urem i32 1, %.0		; <i32>:7 [#uses=2]
-	sext i8 %p_11 to i16		; <i16>:8 [#uses=1]
-	trunc i32 %3 to i16		; <i16>:9 [#uses=1]
-	tail call i32 @func_15( i16 signext  %8, i16 signext  %9, i32 1 ) nounwind 		; <i32>:10 [#uses=0]
-	load i32* @g_5, align 4		; <i32>:11 [#uses=1]
-	trunc i32 %11 to i16		; <i16>:12 [#uses=1]
-	tail call i32 @func_15( i16 signext  %12, i16 signext  1, i32 %7 ) nounwind 		; <i32>:13 [#uses=0]
-	sext i8 %p_11 to i32		; <i32>:14 [#uses=1]
-	%p_11.lobit = lshr i8 %p_11, 7		; <i8> [#uses=1]
-	%tmp = zext i8 %p_11.lobit to i32		; <i32> [#uses=1]
-	%tmp.not = xor i32 %tmp, 1		; <i32> [#uses=1]
-	%.015 = ashr i32 %14, %tmp.not		; <i32> [#uses=2]
-	icmp eq i32 %.015, 0		; <i1>:15 [#uses=1]
-	%.016 = select i1 %15, i32 1, i32 %.015		; <i32> [#uses=1]
-	udiv i32 %7, %.016		; <i32>:16 [#uses=1]
-	icmp ult i32 %5, %16		; <i1>:17 [#uses=1]
-	zext i1 %17 to i32		; <i32>:18 [#uses=1]
-	store i32 %18, i32* @g_5, align 4
-	br label %entry.ce.ret.exitStub
-}
diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
index eadfda0..947a1f1 100644
--- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
+++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=x86 | grep "#%ebp %edi %ebx 8(%esi) %eax %dl"
-; RUN: llc < %s -march=x86 -regalloc=fast  | grep "#%ebx %esi %edi 8(%ebp) %eax %dl"
+; RUN: llc < %s -march=x86 -regalloc=linearscan | grep "#%ebp %edi %ebx 8(%esi) %eax %dl"
+; RUN: llc < %s -march=x86 -regalloc=fast       | grep "#%ebx %esi %edi 8(%ebp) %eax %dl"
+; RUN: llc < %s -march=x86 -regalloc=basic      | grep "#%ebp %esi %edx 8(%edi) %eax %bl"
+; RUN: llc < %s -march=x86 -regalloc=greedy     | grep "#%edx %edi %ebp 8(%esi) %eax %bl"
 
 ; The 1st, 2nd, 3rd and 5th registers above must all be different.  The registers
 ; referenced in the 4th and 6th operands must not be the same as the 1st or 5th
diff --git a/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll b/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll
index e97b63d..2e27811 100644
--- a/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll
+++ b/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll
@@ -1,7 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | grep ^.L_Z1fv.eh
-; RUN: llc < %s -march=x86    -mtriple=i686-unknown-linux-gnu | grep ^.L_Z1fv.eh
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin9 | grep ^__Z1fv.eh
-; RUN: llc < %s -march=x86    -mtriple=i386-apple-darwin9 | grep ^__Z1fv.eh
+; RUN: llc < %s -disable-cfi -march=x86-64 -mtriple=x86_64-apple-darwin9 | grep ^__Z1fv.eh
+; RUN: llc < %s -disable-cfi -march=x86    -mtriple=i386-apple-darwin9 | grep ^__Z1fv.eh
 
 define void @_Z1fv() {
 entry:
diff --git a/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll b/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
index 58a7f9f..aba4bfc 100644
--- a/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
+++ b/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8 -pre-alloc-split -regalloc=linearscan
 
 define i32 @main() nounwind {
 bb4.i.thread:
diff --git a/test/CodeGen/X86/2009-03-11-CoalescerBug.ll b/test/CodeGen/X86/2009-03-11-CoalescerBug.ll
deleted file mode 100644
index d5ba93e..0000000
--- a/test/CodeGen/X86/2009-03-11-CoalescerBug.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin9 -stats |& grep regcoalescing | grep commuting
-
-@lookupTable5B = external global [64 x i32], align 32		; <[64 x i32]*> [#uses=1]
-@lookupTable3B = external global [16 x i32], align 32		; <[16 x i32]*> [#uses=1]
-@disparity0 = external global i32		; <i32*> [#uses=5]
-@disparity1 = external global i32		; <i32*> [#uses=3]
-
-define i32 @calc(i32 %theWord, i32 %k) nounwind {
-entry:
-	%0 = lshr i32 %theWord, 3		; <i32> [#uses=1]
-	%1 = and i32 %0, 31		; <i32> [#uses=1]
-	%2 = shl i32 %k, 5		; <i32> [#uses=1]
-	%3 = or i32 %1, %2		; <i32> [#uses=1]
-	%4 = and i32 %theWord, 7		; <i32> [#uses=1]
-	%5 = shl i32 %k, 3		; <i32> [#uses=1]
-	%6 = or i32 %5, %4		; <i32> [#uses=1]
-	%7 = getelementptr [64 x i32]* @lookupTable5B, i32 0, i32 %3		; <i32*> [#uses=1]
-	%8 = load i32* %7, align 4		; <i32> [#uses=5]
-	%9 = getelementptr [16 x i32]* @lookupTable3B, i32 0, i32 %6		; <i32*> [#uses=1]
-	%10 = load i32* %9, align 4		; <i32> [#uses=5]
-	%11 = and i32 %8, 65536		; <i32> [#uses=1]
-	%12 = icmp eq i32 %11, 0		; <i1> [#uses=1]
-	br i1 %12, label %bb1, label %bb
-
-bb:		; preds = %entry
-	%13 = and i32 %8, 994		; <i32> [#uses=1]
-	%14 = load i32* @disparity0, align 4		; <i32> [#uses=2]
-	store i32 %14, i32* @disparity1, align 4
-	br label %bb8
-
-bb1:		; preds = %entry
-	%15 = lshr i32 %8, 18		; <i32> [#uses=1]
-	%16 = and i32 %15, 1		; <i32> [#uses=1]
-	%17 = load i32* @disparity0, align 4		; <i32> [#uses=4]
-	%18 = icmp eq i32 %16, %17		; <i1> [#uses=1]
-	%not = select i1 %18, i32 0, i32 994		; <i32> [#uses=1]
-	%.masked = and i32 %8, 994		; <i32> [#uses=1]
-	%result.1 = xor i32 %not, %.masked		; <i32> [#uses=2]
-	%19 = and i32 %8, 524288		; <i32> [#uses=1]
-	%20 = icmp eq i32 %19, 0		; <i1> [#uses=1]
-	br i1 %20, label %bb7, label %bb6
-
-bb6:		; preds = %bb1
-	%21 = xor i32 %17, 1		; <i32> [#uses=2]
-	store i32 %21, i32* @disparity1, align 4
-	br label %bb8
-
-bb7:		; preds = %bb1
-	store i32 %17, i32* @disparity1, align 4
-	br label %bb8
-
-bb8:		; preds = %bb7, %bb6, %bb
-	%22 = phi i32 [ %17, %bb7 ], [ %21, %bb6 ], [ %14, %bb ]		; <i32> [#uses=4]
-	%result.0 = phi i32 [ %result.1, %bb7 ], [ %result.1, %bb6 ], [ %13, %bb ]		; <i32> [#uses=2]
-	%23 = and i32 %10, 65536		; <i32> [#uses=1]
-	%24 = icmp eq i32 %23, 0		; <i1> [#uses=1]
-	br i1 %24, label %bb10, label %bb9
-
-bb9:		; preds = %bb8
-	%25 = and i32 %10, 29		; <i32> [#uses=1]
-	%26 = or i32 %result.0, %25		; <i32> [#uses=1]
-	store i32 %22, i32* @disparity0, align 4
-	ret i32 %26
-
-bb10:		; preds = %bb8
-	%27 = lshr i32 %10, 18		; <i32> [#uses=1]
-	%28 = and i32 %27, 1		; <i32> [#uses=1]
-	%29 = icmp eq i32 %28, %22		; <i1> [#uses=1]
-	%not13 = select i1 %29, i32 0, i32 29		; <i32> [#uses=1]
-	%.masked20 = and i32 %10, 29		; <i32> [#uses=1]
-	%.pn = xor i32 %not13, %.masked20		; <i32> [#uses=1]
-	%result.3 = or i32 %.pn, %result.0		; <i32> [#uses=2]
-	%30 = and i32 %10, 524288		; <i32> [#uses=1]
-	%31 = icmp eq i32 %30, 0		; <i1> [#uses=1]
-	br i1 %31, label %bb17, label %bb16
-
-bb16:		; preds = %bb10
-	%32 = xor i32 %22, 1		; <i32> [#uses=1]
-	store i32 %32, i32* @disparity0, align 4
-	ret i32 %result.3
-
-bb17:		; preds = %bb10
-	store i32 %22, i32* @disparity0, align 4
-	ret i32 %result.3
-}
diff --git a/test/CodeGen/X86/2009-03-16-SpillerBug.ll b/test/CodeGen/X86/2009-03-16-SpillerBug.ll
index 80e7639..951e191 100644
--- a/test/CodeGen/X86/2009-03-16-SpillerBug.ll
+++ b/test/CodeGen/X86/2009-03-16-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -stats |& grep virtregrewriter | not grep {stores unfolded}
+; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=linearscan -stats |& grep virtregrewriter | not grep {stores unfolded}
 ; rdar://6682365
 
 ; Do not clobber a register if another spill slot is available in it and it's marked "do not clobber".
diff --git a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
index fcb2ed0..f739216 100644
--- a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
+++ b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 82
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 77
 ; rdar://6802189
 
 ; Test if linearscan is unfavoring registers for allocation to allow more reuse
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
index 69f644f..620e0f3 100644
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
-; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false < %s | \
+; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=linearscan < %s | \
 ; RUN:   FileCheck %s
 ; rdar://6808032
 
diff --git a/test/CodeGen/X86/2009-04-24.ll b/test/CodeGen/X86/2009-04-24.ll
index dd88235..d6ed0c4 100644
--- a/test/CodeGen/X86/2009-04-24.ll
+++ b/test/CodeGen/X86/2009-04-24.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -relocation-model=pic > %t2
 ; RUN: grep {leaq.*TLSGD} %t2
-; RUN; grep {__tls_get_addr} %t2
+; RUN: grep {__tls_get_addr} %t2
 ; PR4004
 
 @i = thread_local global i32 15
diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
index 7325f4a..f6ac2ba 100644
--- a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
+++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
@@ -11,7 +11,7 @@
 ; Move return address (76(%esp)) to a temporary register (%ebp)
 ; CHECK: movl 76(%esp), [[REGISTER:%[a-z]+]]
 ; Overwrite return addresss
-; CHECK: movl %ebx, 76(%esp)
+; CHECK: movl [[EBX:%[a-z]+]], 76(%esp)
 ; Move return address from temporary register (%ebp) to new stack location (60(%esp))
 ; CHECK: movl [[REGISTER]], 60(%esp)
 
diff --git a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
index fa3d5fb..69787c7 100644
--- a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
+++ b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
@@ -31,18 +31,19 @@ entry:
         ret void
 }
 
+; CHECK: ti64
 define void @ti64(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <1 x i64>
         %tmp2 = bitcast double %b to <1 x i64>
         %tmp3 = add <1 x i64> %tmp1, %tmp2
-; CHECK:  addq  %rax, %rcx
+; CHECK:  addq
         store <1 x i64> %tmp3, <1 x i64>* null
         ret void
 }
 
 ; MMX intrinsics calls get us MMX instructions.
-
+; CHECK: ti8a
 define void @ti8a(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to x86_mmx
diff --git a/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
index 323925c..5accfd7 100644
--- a/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
+++ b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
@@ -22,8 +22,8 @@ while.end:                                        ; preds = %while.cond, %entry
   %conv = zext i32 %v to i64                      ; <i64> [#uses=1]
   %conv14 = zext i32 %div11 to i64                ; <i64> [#uses=1]
 ; Verify that we don't clobber %eax after putting the imulq result in %rax
-; CHECK: imulq	%r{{.}}x, %r[[RES:.]]x
-; CHECK-NOT: movl	{{.*}}, %e[[RES]]x
+; CHECK: imulq	%r{{.}}x, %r[[RES:..]]
+; CHECK-NOT: movl	{{.*}}, %e[[RES]]
 ; CHECK: div
   %mul = mul i64 %conv14, %conv                   ; <i64> [#uses=1]
   %conv16 = zext i32 %div to i64                  ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index f9bda7f..848af82 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=x86-64 -O2 < %s | FileCheck %s
+; RUN: llc -march=x86-64 -O2 -regalloc=basic < %s | FileCheck %s
 ; Test to check .debug_loc support. This test case emits many debug_loc entries.
 
 ; CHECK: Loc expr size
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 60171eb..6600cc3 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -O2 < %s | FileCheck %s
+; RUN: llc -O2 -regalloc=basic < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin"
+target triple = "x86_64-apple-darwin10"
 
 %struct.a = type { i32, %struct.a* }
 
@@ -55,12 +56,21 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !29 = metadata !{i32 524299, metadata !9, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
 !30 = metadata !{i32 19, i32 0, metadata !29, null}
 
+; The variable bar:myvar changes registers after the first movq.
+; It is cobbered by popq %rbx
+; CHECK: movq
+; CHECK-NEXT: [[LABEL:Ltmp[0-9]*]]
+; CHECK: .loc	1 19 0
+; CHECK: popq
+; CHECK-NEXT: [[CLOBBER:Ltmp[0-9]*]]
+
+
 ; CHECK: Ldebug_loc0:
 ; CHECK-NEXT: .quad   Lfunc_begin0
-; CHECK-NEXT: .quad   Ltmp3
+; CHECK-NEXT: .quad   [[LABEL]]
 ; CHECK-NEXT: .short  1
 ; CHECK-NEXT: .byte   85
-; CHECK-NEXT: .quad   Ltmp3
-; CHECK-NEXT: .quad   Ltmp6
+; CHECK-NEXT: .quad   [[LABEL]]
+; CHECK-NEXT: .quad   [[CLOBBER]]
 ; CHECK-NEXT: .short  1
 ; CHECK-NEXT: .byte   83
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index ad8546e..1a0da31 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -1,4 +1,5 @@
 ; RUN: llc  -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc  -mtriple=x86_64-apple-darwin -regalloc=basic < %s | FileCheck %s
 ; Test to check separate label for inlined function argument.
 
 define i32 @foo(i32 %y) nounwind optsize ssp {
diff --git a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
index 812d372..a9c03ee 100644
--- a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
+++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O2 < %s | FileCheck %s 
+; RUN: llc -O2 < %s | FileCheck %s
+; RUN: llc -O2 -regalloc=basic < %s | FileCheck %s
 ; Test to check that unused argument 'this' is not undefined in debug info.
 
 target triple = "x86_64-apple-darwin10.2"
diff --git a/test/CodeGen/X86/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll
index edfd1b8..ba36fe7 100644
--- a/test/CodeGen/X86/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -1,5 +1,5 @@
-; RUN: llc -O0 -mtriple=x86_64-apple-darwin < %s | grep DW_OP_fbreg
-; Use DW_OP_fbreg in variable's location expression if the variable is in a stack slot.
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin < %s | grep DW_OP_breg7
+; Use DW_OP_breg7 in variable's location expression if the variable is in a stack slot.
 
 %struct.SVal = type { i8*, i32 }
 
diff --git a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
index 8fe0309..eaede30 100644
--- a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
+++ b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
@@ -18,9 +18,9 @@ entry:
   ret i32 0
 }
 
-; CHECK: movq	___stack_chk_guard@GOTPCREL(%rip), %rax
-; CHECK: movb	30(%rsp), %dl
-; CHECK: movb	(%rsp), %sil
-; CHECK: movb	%sil, (%rsp)
-; CHECK: movb	%dl, 30(%rsp)
+; CHECK: movq	___stack_chk_guard@GOTPCREL(%rip)
+; CHECK: movb   38(%rsp), [[R0:%.+]]
+; CHECK: movb   8(%rsp), [[R1:%.+]]
+; CHECK: movb   [[R1]], 8(%rsp)
+; CHECK: movb   [[R0]], 38(%rsp)
 ; CHECK: callq	___stack_chk_fail
diff --git a/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll b/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
index cae81d0..73e996c 100644
--- a/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
+++ b/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -cgp-critical-edge-splitting=0 -mcpu=i386 < %s
+; RUN: llc -verify-machineinstrs -mcpu=i386 < %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
 target triple = "i386-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index 973975b..7f13411 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -regalloc=basic | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
diff --git a/test/CodeGen/X86/2011-02-27-Fpextend.ll b/test/CodeGen/X86/2011-02-27-Fpextend.ll
new file mode 100644
index 0000000..c12b956
--- /dev/null
+++ b/test/CodeGen/X86/2011-02-27-Fpextend.ll
@@ -0,0 +1,7 @@
+; RUN: llc -mtriple=x86_64-pc-linux < %s
+; PR9309
+
+define <4 x double> @f_fu(<4 x float>) nounwind {
+  %float2double.i = fpext <4 x float> %0 to <4 x double>
+  ret <4 x double> %float2double.i
+}
diff --git a/test/CodeGen/X86/2011-03-02-DAGCombiner.ll b/test/CodeGen/X86/2011-03-02-DAGCombiner.ll
new file mode 100644
index 0000000..be58ced
--- /dev/null
+++ b/test/CodeGen/X86/2011-03-02-DAGCombiner.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86-64
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0.0"
+
+%0 = type { i8, [3 x i8] }
+%struct.anon = type { float, x86_fp80 }
+
+define i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32, align 4
+  %F = alloca %struct.anon, align 16
+  %K = alloca %0, align 4
+  store i32 0, i32* %retval
+  %0 = bitcast %0* %K to i32*
+  %1 = load i32* %0, align 4
+  %2 = and i32 %1, -121
+  %3 = or i32 %2, 32
+  store i32 %3, i32* %0, align 4
+  %4 = bitcast %0* %K to i32*
+  %5 = load i32* %4, align 4
+  %6 = lshr i32 %5, 3
+  %bf.clear = and i32 %6, 15
+  %conv = sitofp i32 %bf.clear to float
+  %f = getelementptr inbounds %struct.anon* %F, i32 0, i32 0
+  %tmp = load float* %f, align 4
+  %sub = fsub float %tmp, %conv
+  store float %sub, float* %f, align 4
+  %ld = getelementptr inbounds %struct.anon* %F, i32 0, i32 1
+  %tmp1 = load x86_fp80* %ld, align 16
+  %7 = bitcast %0* %K to i32*
+  %8 = load i32* %7, align 4
+  %9 = lshr i32 %8, 7
+  %bf.clear2 = and i32 %9, 1
+  %conv3 = uitofp i32 %bf.clear2 to x86_fp80
+  %sub4 = fsub x86_fp80 %conv3, %tmp1
+  %conv5 = fptoui x86_fp80 %sub4 to i32
+  %bf.value = and i32 %conv5, 1
+  %10 = bitcast %0* %K to i32*
+  %11 = and i32 %bf.value, 1
+  %12 = shl i32 %11, 7
+  %13 = load i32* %10, align 4
+  %14 = and i32 %13, -129
+  %15 = or i32 %14, %12
+  store i32 %15, i32* %10, align 4
+  %call = call i32 (...)* @iequals(i32 1841, i32 %bf.value, i32 0)
+  %16 = load i32* %retval
+  ret i32 %16
+}
+
+declare i32 @iequals(...)
diff --git a/test/CodeGen/X86/2011-03-08-Sched-crash.ll b/test/CodeGen/X86/2011-03-08-Sched-crash.ll
new file mode 100644
index 0000000..6329ae6
--- /dev/null
+++ b/test/CodeGen/X86/2011-03-08-Sched-crash.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin9.0.0"
+
+%0 = type { i32, i1 }
+
+declare %0 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+
+define linkonce_odr hidden void @_ZN2js5QueueINS_7SlotMap8SlotInfoEE6ensureEj(i8* nocapture %this, i32 %size) nounwind align 2 {
+  br i1 undef, label %14, label %1
+
+; <label>:1                                       ; preds = %0
+  br i1 undef, label %2, label %3
+
+; <label>:2                                       ; preds = %1
+  br label %3
+
+; <label>:3                                       ; preds = %2, %1
+  br i1 undef, label %13, label %4
+
+; <label>:4                                       ; preds = %3
+  %5 = tail call %0 @llvm.umul.with.overflow.i32(i32 undef, i32 16)
+  %6 = extractvalue %0 %5, 1
+  %7 = extractvalue %0 %5, 0
+  %.op = add i32 %7, 7
+  %.op.op = and i32 %.op, -8
+  %8 = select i1 %6, i32 0, i32 %.op.op
+  br i1 undef, label %10, label %9
+
+; <label>:9                                       ; preds = %4
+  br label %_ZnamRN7nanojit9AllocatorE.exit
+
+; <label>:10                                      ; preds = %4
+  %11 = tail call i8* @_ZN7nanojit9Allocator9allocSlowEmb(i8* undef, i32 %8, i1 zeroext false) nounwind
+  br label %_ZnamRN7nanojit9AllocatorE.exit
+
+_ZnamRN7nanojit9AllocatorE.exit:                  ; preds = %10, %9
+  br i1 false, label %._crit_edge, label %.lr.ph
+
+.lr.ph:                                           ; preds = %_ZnamRN7nanojit9AllocatorE.exit
+  br label %12
+
+; <label>:12                                      ; preds = %12, %.lr.ph
+  br i1 undef, label %._crit_edge, label %12
+
+._crit_edge:                                      ; preds = %12, %_ZnamRN7nanojit9AllocatorE.exit
+  br label %14
+
+; <label>:13                                      ; preds = %3
+  br label %14
+
+; <label>:14                                      ; preds = %13, %._crit_edge, %0
+  ret void
+}
+
+declare i8* @_ZN7nanojit9Allocator9allocSlowEmb(i8*, i32, i1 zeroext)
diff --git a/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll b/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
new file mode 100644
index 0000000..e48edf7e
--- /dev/null
+++ b/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mcpu=yonah < %s
+; PR9438
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-unknown-freebsd9.0"
+
+; The 'call fastcc' ties down %ebx, %ecx, and %edx.
+; A MUL8r ties down %al, leaving no GR32_ABCD registers available.
+; The coalescer can easily overallocate physical registers,
+; and register allocation fails.
+
+declare fastcc i8* @save_string(i8* %d, i8* nocapture %s) nounwind
+
+define i32 @cvtchar(i8* nocapture %sp) nounwind {
+  %temp.i = alloca [2 x i8], align 1
+  %tmp1 = load i8* %sp, align 1
+  %div = udiv i8 %tmp1, 10
+  %rem = urem i8 %div, 10
+  %arrayidx.i = getelementptr inbounds [2 x i8]* %temp.i, i32 0, i32 0
+  store i8 %rem, i8* %arrayidx.i, align 1
+  %call.i = call fastcc i8* @save_string(i8* %sp, i8* %arrayidx.i) nounwind
+  ret i32 undef
+}
diff --git a/test/CodeGen/X86/2011-03-30-CreateFixedObjCrash.ll b/test/CodeGen/X86/2011-03-30-CreateFixedObjCrash.ll
new file mode 100644
index 0000000..38a9b3d
--- /dev/null
+++ b/test/CodeGen/X86/2011-03-30-CreateFixedObjCrash.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86
+
+; rdar://7983260
+
+%struct.T0 = type {}
+
+define void @fn4(%struct.T0* byval %arg0) nounwind ssp {
+entry:
+  ret void
+}
diff --git a/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll b/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
new file mode 100644
index 0000000..07b1971
--- /dev/null
+++ b/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=yonah | FileCheck %s
+; Reduced from JavaScriptCore
+
+%"class.JSC::CodeLocationCall" = type { [8 x i8] }
+%"class.JSC::JSGlobalData" = type { [4 x i8] }
+%"class.JSC::FunctionPtr" = type { i8* }
+%"class.JSC::Structure" = type { [4 x i8] }
+%"class.JSC::UString" = type { i8* }
+%"class.JSC::JSString" = type { [16 x i8], i32, %"class.JSC::UString", i32 }
+
+declare hidden fastcc void @_ZN3JSCL23returnToThrowTrampolineEPNS_12JSGlobalDataENS_16ReturnAddressPtrERS2_(%"class.JSC::JSGlobalData"* nocapture, i8*, %"class.JSC::FunctionPtr"* nocapture) nounwind noinline ssp
+
+; Avoid hoisting the test above loads or copies
+; CHECK: %entry
+; CHECK: cmpq
+; CHECK-NOT: mov
+; CHECK: jb
+define i32 @cti_op_eq(i8** nocapture %args) nounwind ssp {
+entry:
+  %0 = load i8** null, align 8
+  %tmp13 = bitcast i8* %0 to %"class.JSC::CodeLocationCall"*
+  %tobool.i.i.i = icmp ugt i8* undef, inttoptr (i64 281474976710655 to i8*)
+  %or.cond.i = and i1 %tobool.i.i.i, undef
+  br i1 %or.cond.i, label %if.then.i, label %if.end.i
+
+if.then.i:                                        ; preds = %entry
+  br i1 undef, label %if.then.i.i.i, label %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit
+
+if.then.i.i.i:                                    ; preds = %if.then.i
+  %conv.i.i.i.i = trunc i64 undef to i32
+  br label %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit
+
+if.end.i:                                         ; preds = %entry
+  br i1 undef, label %land.rhs.i121.i, label %_ZNK3JSC7JSValue8isStringEv.exit122.i
+
+land.rhs.i121.i:                                  ; preds = %if.end.i
+  %tmp.i.i117.i = load %"class.JSC::Structure"** undef, align 8
+  br label %_ZNK3JSC7JSValue8isStringEv.exit122.i
+
+_ZNK3JSC7JSValue8isStringEv.exit122.i:            ; preds = %land.rhs.i121.i, %if.end.i
+  %brmerge.i = or i1 undef, false
+  %or.cond = or i1 false, %brmerge.i
+  br i1 %or.cond, label %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit, label %if.then.i92.i
+
+if.then.i92.i:                                    ; preds = %_ZNK3JSC7JSValue8isStringEv.exit122.i
+  tail call void @_ZNK3JSC8JSString11resolveRopeEPNS_9ExecStateE(%"class.JSC::JSString"* undef, %"class.JSC::CodeLocationCall"* %tmp13) nounwind
+  unreachable
+
+_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit: ; preds = %_ZNK3JSC7JSValue8isStringEv.exit122.i, %if.then.i.i.i, %if.then.i
+
+  %1 = load i8** undef, align 8
+  br i1 undef, label %do.end39, label %do.body27
+
+do.body27:                                        ; preds = %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit
+  %tmp30 = bitcast i8* %1 to %"class.JSC::JSGlobalData"*
+  %2 = getelementptr inbounds i8** %args, i64 -1
+  %3 = bitcast i8** %2 to %"class.JSC::FunctionPtr"*
+  tail call fastcc void @_ZN3JSCL23returnToThrowTrampolineEPNS_12JSGlobalDataENS_16ReturnAddressPtrERS2_(%"class.JSC::JSGlobalData"* %tmp30, i8* undef, %"class.JSC::FunctionPtr"* %3)
+  unreachable
+
+do.end39:                                         ; preds = %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit
+  ret i32 undef
+}
+
+declare void @_ZNK3JSC8JSString11resolveRopeEPNS_9ExecStateE(%"class.JSC::JSString"*, %"class.JSC::CodeLocationCall"*)
diff --git a/test/CodeGen/X86/3dnow-intrinsics.ll b/test/CodeGen/X86/3dnow-intrinsics.ll
new file mode 100644
index 0000000..0b27bf2
--- /dev/null
+++ b/test/CodeGen/X86/3dnow-intrinsics.ll
@@ -0,0 +1,297 @@
+; RUN: llc < %s -march=x86 -mattr=+3dnow | FileCheck %s
+
+define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
+; CHECK: pavgusb
+entry:
+  %0 = bitcast x86_mmx %a.coerce to <8 x i8>
+  %1 = bitcast x86_mmx %b.coerce to <8 x i8>
+  %2 = bitcast <8 x i8> %0 to x86_mmx
+  %3 = bitcast <8 x i8> %1 to x86_mmx
+  %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3)
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  ret <8 x i8> %5
+}
+
+declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone {
+; CHECK: pf2id
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0)
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  ret <2 x i32> %2
+}
+
+declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfacc
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfadd
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfcmpeq
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  ret <2 x i32> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfcmpge
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  ret <2 x i32> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfcmpgt
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  ret <2 x i32> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfmax
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfmin
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfmul
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone {
+; CHECK: pfrcp
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0)
+  %2 = bitcast x86_mmx %1 to <2 x float>
+  ret <2 x float> %2
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfrcpit1
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfrcpit2
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone {
+; CHECK: pfrsqrt
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0)
+  %2 = bitcast x86_mmx %1 to <2 x float>
+  ret <2 x float> %2
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfrsqit1
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfsub
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfsubr
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone {
+; CHECK: pi2fd
+entry:
+  %0 = bitcast x86_mmx %a.coerce to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to x86_mmx
+  %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
+
+define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
+; CHECK: pmulhrw
+entry:
+  %0 = bitcast x86_mmx %a.coerce to <4 x i16>
+  %1 = bitcast x86_mmx %b.coerce to <4 x i16>
+  %2 = bitcast <4 x i16> %0 to x86_mmx
+  %3 = bitcast <4 x i16> %1 to x86_mmx
+  %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3)
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  ret <4 x i16> %5
+}
+
+declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone {
+; CHECK: pf2iw
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0)
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  ret <2 x i32> %2
+}
+
+declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfnacc
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
+; CHECK: pfpnacc
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = bitcast <2 x float> %b to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
+
+define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone {
+; CHECK: pi2fw
+entry:
+  %0 = bitcast x86_mmx %a.coerce to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to x86_mmx
+  %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
+  %3 = bitcast x86_mmx %2 to <2 x float>
+  ret <2 x float> %3
+}
+
+declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
+
+define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone {
+; CHECK: pswapd
+entry:
+  %0 = bitcast <2 x float> %a to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
+  %2 = bitcast x86_mmx %1 to <2 x float>
+  ret <2 x float> %2
+}
+
+define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
+; CHECK: pswapd
+entry:
+  %0 = bitcast <2 x i32> %a to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  ret <2 x i32> %2
+}
+
+declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone
diff --git a/test/CodeGen/X86/MachineSink-PHIUse.ll b/test/CodeGen/X86/MachineSink-PHIUse.ll
index 728e377..3758fd8 100644
--- a/test/CodeGen/X86/MachineSink-PHIUse.ll
+++ b/test/CodeGen/X86/MachineSink-PHIUse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-appel-darwin -stats |& grep {machine-sink}
+; RUN: llc < %s -mtriple=x86_64-appel-darwin -disable-cgp-branch-opts -stats |& grep {machine-sink}
 
 define fastcc void @t() nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/SIMD/dg.exp b/test/CodeGen/X86/SIMD/dg.exp
new file mode 100644
index 0000000..629a147
--- /dev/null
+++ b/test/CodeGen/X86/SIMD/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/X86/SIMD/notvunpcklpd.ll b/test/CodeGen/X86/SIMD/notvunpcklpd.ll
new file mode 100644
index 0000000..3afc2f2
--- /dev/null
+++ b/test/CodeGen/X86/SIMD/notvunpcklpd.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mattr=+avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @try_([2 x <4 x double>]* noalias %incarray, [2 x <4 x double>]* noalias %incarrayb ) {
+entry:
+	%incarray1 = alloca [2 x <4 x double>]*, align 8
+	%incarrayb1 = alloca [2 x <4 x double>]*, align 8
+	%carray = alloca [2 x <4 x double>], align 16
+	%r = getelementptr [2 x <4 x double>]* %incarray, i32 0, i32 0
+	%rb = getelementptr [2 x <4 x double>]* %incarrayb, i32 0, i32 0
+	%r3 = load <4 x double>* %r, align 8
+	%r4 = load <4 x double>* %rb, align 8
+	%r11 = shufflevector <4 x double> %r3, <4 x double> %r4, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x double>> [#uses=1]
+; CHECK-NOT: vunpcklpd
+	%r12 = getelementptr [2 x <4 x double>]* %carray, i32 0, i32 1
+	store <4 x double> %r11, <4 x double>* %r12, align 4
+	ret void
+}
diff --git a/test/CodeGen/X86/SIMD/notvunpcklps.ll b/test/CodeGen/X86/SIMD/notvunpcklps.ll
new file mode 100644
index 0000000..19daa3e
--- /dev/null
+++ b/test/CodeGen/X86/SIMD/notvunpcklps.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mattr=+avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @try_([2 x <8 x float>]* noalias %incarray, [2 x <8 x float>]* noalias %incarrayb ) {
+enmtry:
+	%incarray1 = alloca [2 x <8 x float>]*, align 8
+	%incarrayb1 = alloca [2 x <8 x float>]*, align 8
+	%carray = alloca [2 x <8 x float>], align 16
+	%r = getelementptr [2 x <8 x float>]* %incarray, i32 0, i32 0
+	%rb = getelementptr [2 x <8 x float>]* %incarrayb, i32 0, i32 0
+	%r3 = load <8 x float>* %r, align 8
+	%r4 = load <8 x float>* %rb, align 8
+	%r8 = shufflevector <8 x float> %r3, <8 x float> %r4, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x float>> [#uses=1]
+; CHECK-NOT: vunpcklps
+	%r9 = getelementptr [2 x <8 x float>]* %carray, i32 0, i32 0
+	store <8 x float> %r8, <8 x float>* %r9, align 4
+	ret void
+}
diff --git a/test/CodeGen/X86/SIMD/vunpcklpd.ll b/test/CodeGen/X86/SIMD/vunpcklpd.ll
new file mode 100644
index 0000000..60d23a4
--- /dev/null
+++ b/test/CodeGen/X86/SIMD/vunpcklpd.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mattr=+avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @try_([2 x <4 x double>]* noalias %incarray, [2 x <4 x double>]* noalias %incarrayb ) {
+entry:
+	%incarray1 = alloca [2 x <4 x double>]*, align 8
+	%incarrayb1 = alloca [2 x <4 x double>]*, align 8
+	%carray = alloca [2 x <4 x double>], align 16
+	%r = getelementptr [2 x <4 x double>]* %incarray, i32 0, i32 0
+	%rb = getelementptr [2 x <4 x double>]* %incarrayb, i32 0, i32 0
+	%r3 = load <4 x double>* %r, align 8
+	%r4 = load <4 x double>* %rb, align 8
+	%r11 = shufflevector <4 x double> %r3, <4 x double> %r4, <4 x i32> < i32 0, i32 4, i32 2, i32 6 >		; <<4 x double>> [#uses=1]
+; CHECK: vunpcklpd
+	%r12 = getelementptr [2 x <4 x double>]* %carray, i32 0, i32 1
+	store <4 x double> %r11, <4 x double>* %r12, align 4
+	ret void
+}
diff --git a/test/CodeGen/X86/SIMD/vunpcklps.ll b/test/CodeGen/X86/SIMD/vunpcklps.ll
new file mode 100644
index 0000000..a87b299
--- /dev/null
+++ b/test/CodeGen/X86/SIMD/vunpcklps.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mattr=+avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @try_([2 x <8 x float>]* noalias %incarray, [2 x <8 x float>]* noalias %incarrayb ) {
+entry:
+	%incarray1 = alloca [2 x <8 x float>]*, align 8
+	%incarrayb1 = alloca [2 x <8 x float>]*, align 8
+	%carray = alloca [2 x <8 x float>], align 16
+	%r = getelementptr [2 x <8 x float>]* %incarray, i32 0, i32 0
+	%rb = getelementptr [2 x <8 x float>]* %incarrayb, i32 0, i32 0
+	%r3 = load <8 x float>* %r, align 8
+	%r4 = load <8 x float>* %rb, align 8
+	%r11 = shufflevector <8 x float> %r3, <8 x float> %r4, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13 >		; <<8 x float>> [#uses=1]
+; CHECK: vunpcklps
+	%r12 = getelementptr [2 x <8 x float>]* %carray, i32 0, i32 1
+	store <8 x float> %r11, <8 x float>* %r12, align 4
+	ret void
+}
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 5b4d79f..7535e07 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -12,6 +12,17 @@
 ; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
 ; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
 
+; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
+; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
+; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
+; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -regalloc=basic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
+
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
 @xsrc = external global [32 x i32]
@@ -38,68 +49,68 @@ entry:
 	ret void
 
 ; LINUX-64-STATIC: foo00:
-; LINUX-64-STATIC: movl	src(%rip), %eax
-; LINUX-64-STATIC: movl	%eax, dst
+; LINUX-64-STATIC: movl	src(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl	[[EAX]], dst
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: foo00:
-; LINUX-32-STATIC: 	movl	src, %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, dst
+; LINUX-32-STATIC: 	movl	src, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dst
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: foo00:
-; LINUX-32-PIC: 	movl	src, %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, dst
+; LINUX-32-PIC: 	movl	src, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dst
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: foo00:
-; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r..]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e..]]
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r..]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _foo00:
-; DARWIN-32-STATIC: 	movl	_src, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst
+; DARWIN-32-STATIC: 	movl	_src, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _dst
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _foo00:
-; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, [[EAX:%e..]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), [[EAX:%e..]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[ECX:%e..]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo00:
 ; DARWIN-32-PIC: 	calll	L0$pb
 ; DARWIN-32-PIC-NEXT: L0$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L0$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L0$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L0$pb([[EAX]]), [[ECX:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	([[ECX]]), [[ECX:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L0$pb([[EAX]]), [[EAX:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _foo00:
-; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r..]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]]), [[EAX:%e..]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r..]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _foo00:
-; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r..]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]]), [[EAX:%e..]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r..]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _foo00:
-; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r..]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e..]]
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r..]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -110,68 +121,68 @@ entry:
 	ret void
 
 ; LINUX-64-STATIC: fxo00:
-; LINUX-64-STATIC: movl	xsrc(%rip), %eax
-; LINUX-64-STATIC: movl	%eax, xdst
+; LINUX-64-STATIC: movl	xsrc(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl	[[EAX]], xdst
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: fxo00:
-; LINUX-32-STATIC: 	movl	xsrc, %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, xdst
+; LINUX-32-STATIC: 	movl	xsrc, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], xdst
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: fxo00:
-; LINUX-32-PIC: 	movl	xsrc, %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, xdst
+; LINUX-32-PIC: 	movl	xsrc, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], xdst
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: fxo00:
-; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _fxo00:
-; DARWIN-32-STATIC: 	movl	_xsrc, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _xdst
+; DARWIN-32-STATIC: 	movl	_xsrc, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _xdst
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _fxo00:
-; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _fxo00:
 ; DARWIN-32-PIC: 	calll	L1$pb
 ; DARWIN-32-PIC-NEXT: L1$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L1$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L1$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L1$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L1$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _fxo00:
-; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _fxo00:
-; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _fxo00:
-; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -192,9 +203,9 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: foo01:
-; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _foo01:
@@ -202,36 +213,36 @@ entry:
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _foo01:
-; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo01:
 ; DARWIN-32-PIC: 	calll	L2$pb
 ; DARWIN-32-PIC-NEXT: L2$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L2$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L2$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L2$pb(
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L2$pb(
+; DARWIN-32-PIC-NEXT: 	movl
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _foo01:
-; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _foo01:
-; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _foo01:
-; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -252,9 +263,9 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: fxo01:
-; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _fxo01:
@@ -262,36 +273,36 @@ entry:
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _fxo01:
-; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _fxo01:
 ; DARWIN-32-PIC: 	calll	L3$pb
 ; DARWIN-32-PIC-NEXT: L3$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L3$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L3$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[R0:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L3$pb([[R0]]), [[R1:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L3$pb([[R0]]), [[R2:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	[[R1:%e..]], ([[R2]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _fxo01:
-; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _fxo01:
-; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _fxo01:
-; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -308,72 +319,72 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: foo02:
-; LINUX-32-STATIC: 	movl	src, %eax
-; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC: 	movl	src, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: foo02:
-; LINUX-32-PIC: 	movl	src, %eax
-; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
-; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC: 	movl	src, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: foo02:
-; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _foo02:
-; DARWIN-32-STATIC: 	movl	_src, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC: 	movl	_src, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _foo02:
-; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo02:
 ; DARWIN-32-PIC: 	calll	L4$pb
 ; DARWIN-32-PIC-NEXT: L4$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L4$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L4$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[R0:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L4$pb([[R0]]), [[R1:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	([[R1]]), [[R2:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L4$pb([[R0]]), [[R3:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	([[R3]]), [[R4:%e..]]
+; DARWIN-32-PIC-NEXT: 	movl	[[R2]], ([[R4]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _foo02:
-; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _foo02:
-; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _foo02:
-; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -389,73 +400,73 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: fxo02:
-; LINUX-32-STATIC: 	movl	xsrc, %eax
-; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC: 	movl	xsrc, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 	ret void
 
 ; LINUX-32-PIC: fxo02:
-; LINUX-32-PIC: 	movl	xsrc, %eax
-; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
-; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC: 	movl	xsrc, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: fxo02:
-; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _fxo02:
-; DARWIN-32-STATIC: 	movl	_xsrc, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC: 	movl	_xsrc, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _fxo02:
-; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _fxo02:
 ; DARWIN-32-PIC: 	calll	L5$pb
 ; DARWIN-32-PIC-NEXT: L5$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L5$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L5$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L5$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L5$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _fxo02:
-; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _fxo02:
-; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _fxo02:
-; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -465,58 +476,58 @@ entry:
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 0), align 32
 	ret void
 ; LINUX-64-STATIC: foo03:
-; LINUX-64-STATIC: movl    dsrc(%rip), %eax
-; LINUX-64-STATIC: movl    %eax, ddst
+; LINUX-64-STATIC: movl    dsrc(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ddst
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: foo03:
-; LINUX-32-STATIC: 	movl	dsrc, %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst
+; LINUX-32-STATIC: 	movl	dsrc, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ddst
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: foo03:
-; LINUX-32-PIC: 	movl	dsrc, %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, ddst
+; LINUX-32-PIC: 	movl	dsrc, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ddst
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: foo03:
-; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _foo03:
-; DARWIN-32-STATIC: 	movl	_dsrc, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst
+; DARWIN-32-STATIC: 	movl	_dsrc, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ddst
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _foo03:
-; DARWIN-32-DYNAMIC: 	movl	_dsrc, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst
+; DARWIN-32-DYNAMIC: 	movl	_dsrc, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _ddst
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo03:
 ; DARWIN-32-PIC: 	calll	L6$pb
 ; DARWIN-32-PIC-NEXT: L6$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L6$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ddst-L6$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L6$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _ddst-L6$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _foo03:
-; DARWIN-64-STATIC: 	movl	_dsrc(%rip), %eax
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-STATIC: 	movl	_dsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], _ddst(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _foo03:
-; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], _ddst(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _foo03:
-; DARWIN-64-PIC: 	movl	_dsrc(%rip), %eax
-; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-PIC: 	movl	_dsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], _ddst(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -537,9 +548,9 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: foo04:
-; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _foo04:
@@ -553,24 +564,24 @@ entry:
 ; DARWIN-32-PIC: _foo04:
 ; DARWIN-32-PIC: 	calll	L7$pb
 ; DARWIN-32-PIC-NEXT: L7$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_ddst-L7$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L7$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L7$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _dptr-L7$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _foo04:
-; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _foo04:
-; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _foo04:
-; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -587,62 +598,62 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: foo05:
-; LINUX-32-STATIC: 	movl	dsrc, %eax
-; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC: 	movl	dsrc, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: foo05:
-; LINUX-32-PIC: 	movl	dsrc, %eax
-; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
-; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC: 	movl	dsrc, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: foo05:
-; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _foo05:
-; DARWIN-32-STATIC: 	movl	_dsrc, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC: 	movl	_dsrc, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _foo05:
-; DARWIN-32-DYNAMIC: 	movl	_dsrc, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	_dsrc, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo05:
 ; DARWIN-32-PIC: 	calll	L8$pb
 ; DARWIN-32-PIC-NEXT: L8$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L8$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L8$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L8$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L8$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _foo05:
-; DARWIN-64-STATIC: 	movl	_dsrc(%rip), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC: 	movl	_dsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _foo05:
-; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _foo05:
-; DARWIN-64-PIC: 	movl	_dsrc(%rip), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC: 	movl	_dsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -652,56 +663,56 @@ entry:
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 0), align 4
 	ret void
 ; LINUX-64-STATIC: foo06:
-; LINUX-64-STATIC: movl    lsrc(%rip), %eax
-; LINUX-64-STATIC: movl    %eax, ldst(%rip)
+; LINUX-64-STATIC: movl    lsrc(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ldst(%rip)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: foo06:
-; LINUX-32-STATIC: 	movl	lsrc, %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst
+; LINUX-32-STATIC: 	movl	lsrc, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ldst
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: foo06:
-; LINUX-32-PIC: 	movl	lsrc, %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, ldst
+; LINUX-32-PIC: 	movl	lsrc, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ldst
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: foo06:
-; LINUX-64-PIC: 	movl	lsrc(%rip), %eax
-; LINUX-64-PIC-NEXT: 	movl	%eax, ldst(%rip)
+; LINUX-64-PIC: 	movl	lsrc(%rip), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ldst(%rip)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _foo06:
-; DARWIN-32-STATIC: 	movl	_lsrc, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst
+; DARWIN-32-STATIC: 	movl	_lsrc, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ldst
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _foo06:
-; DARWIN-32-DYNAMIC: 	movl	_lsrc, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst
+; DARWIN-32-DYNAMIC: 	movl	_lsrc, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _ldst
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo06:
 ; DARWIN-32-PIC: 	calll	L9$pb
 ; DARWIN-32-PIC-NEXT: L9$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L9$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ldst-L9$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L9$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _ldst-L9$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _foo06:
-; DARWIN-64-STATIC: 	movl	_lsrc(%rip), %eax
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-STATIC: 	movl	_lsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], _ldst(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _foo06:
-; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], _ldst(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _foo06:
-; DARWIN-64-PIC: 	movl	_lsrc(%rip), %eax
-; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-PIC: 	movl	_lsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], _ldst(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -722,8 +733,8 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: foo07:
-; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _foo07:
@@ -737,24 +748,24 @@ entry:
 ; DARWIN-32-PIC: _foo07:
 ; DARWIN-32-PIC: 	calll	L10$pb
 ; DARWIN-32-PIC-NEXT: L10$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_ldst-L10$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L10$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L10$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _lptr-L10$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _foo07:
-; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _foo07:
-; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _foo07:
-; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -771,60 +782,60 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: foo08:
-; LINUX-32-STATIC: 	movl	lsrc, %eax
-; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC: 	movl	lsrc, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: foo08:
-; LINUX-32-PIC: 	movl	lsrc, %eax
-; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
-; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC: 	movl	lsrc, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: foo08:
-; LINUX-64-PIC: 	movl	lsrc(%rip), %eax
-; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC: 	movl	lsrc(%rip), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _foo08:
-; DARWIN-32-STATIC: 	movl	_lsrc, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC: 	movl	_lsrc, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _foo08:
-; DARWIN-32-DYNAMIC: 	movl	_lsrc, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	_lsrc, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo08:
 ; DARWIN-32-PIC: 	calll	L11$pb
 ; DARWIN-32-PIC-NEXT: L11$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L11$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L11$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L11$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L11$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _foo08:
-; DARWIN-64-STATIC: 	movl	_lsrc(%rip), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC: 	movl	_lsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _foo08:
-; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _foo08:
-; DARWIN-64-PIC: 	movl	_lsrc(%rip), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC: 	movl	_lsrc(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -834,68 +845,68 @@ entry:
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), align 4
 	ret void
 ; LINUX-64-STATIC: qux00:
-; LINUX-64-STATIC: movl    src+64(%rip), %eax
-; LINUX-64-STATIC: movl    %eax, dst+64(%rip)
+; LINUX-64-STATIC: movl    src+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], dst+64(%rip)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: qux00:
-; LINUX-32-STATIC: 	movl	src+64, %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, dst+64
+; LINUX-32-STATIC: 	movl	src+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dst+64
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: qux00:
-; LINUX-32-PIC: 	movl	src+64, %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, dst+64
+; LINUX-32-PIC: 	movl	src+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dst+64
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: qux00:
-; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _qux00:
-; DARWIN-32-STATIC: 	movl	_src+64, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst+64
+; DARWIN-32-STATIC: 	movl	_src+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _dst+64
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _qux00:
-; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux00:
 ; DARWIN-32-PIC: 	calll	L12$pb
 ; DARWIN-32-PIC-NEXT: L12$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L12$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L12$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L12$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L12$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 64([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qux00:
-; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _qux00:
-; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _qux00:
-; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -905,68 +916,68 @@ entry:
 	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), align 4
 	ret void
 ; LINUX-64-STATIC: qxx00:
-; LINUX-64-STATIC: movl    xsrc+64(%rip), %eax
-; LINUX-64-STATIC: movl    %eax, xdst+64(%rip)
+; LINUX-64-STATIC: movl    xsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], xdst+64(%rip)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: qxx00:
-; LINUX-32-STATIC: 	movl	xsrc+64, %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, xdst+64
+; LINUX-32-STATIC: 	movl	xsrc+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], xdst+64
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: qxx00:
-; LINUX-32-PIC: 	movl	xsrc+64, %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, xdst+64
+; LINUX-32-PIC: 	movl	xsrc+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], xdst+64
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: qxx00:
-; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _qxx00:
-; DARWIN-32-STATIC: 	movl	_xsrc+64, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _xdst+64
+; DARWIN-32-STATIC: 	movl	_xsrc+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _xdst+64
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _qxx00:
-; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qxx00:
 ; DARWIN-32-PIC: 	calll	L13$pb
 ; DARWIN-32-PIC-NEXT: L13$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L13$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L13$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L13$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L13$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 64([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qxx00:
-; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _qxx00:
-; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _qxx00:
-; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -987,10 +998,10 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: qux01:
-; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	addq	$64, %rax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _qux01:
@@ -998,41 +1009,41 @@ entry:
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _qux01:
-; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux01:
 ; DARWIN-32-PIC: 	calll	L14$pb
 ; DARWIN-32-PIC-NEXT: L14$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L14$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	addl	$64, %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L14$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L14$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	addl	$64, [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L14$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qux01:
-; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _qux01:
-; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _qux01:
-; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -1053,10 +1064,10 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: qxx01:
-; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	addq	$64, %rax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _qxx01:
@@ -1064,41 +1075,41 @@ entry:
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _qxx01:
-; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qxx01:
 ; DARWIN-32-PIC: 	calll	L15$pb
 ; DARWIN-32-PIC-NEXT: L15$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L15$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	addl	$64, %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L15$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L15$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	addl	$64, [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L15$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qxx01:
-; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _qxx01:
-; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _qxx01:
-; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -1109,79 +1120,79 @@ entry:
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
 ; LINUX-64-STATIC: qux02:
-; LINUX-64-STATIC: movl    src+64(%rip), %eax
-; LINUX-64-STATIC: movq    ptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: movl    src+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]])
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: qux02:
-; LINUX-32-STATIC: 	movl	src+64, %eax
-; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC: 	movl	src+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 	ret void
 
 ; LINUX-32-PIC: qux02:
-; LINUX-32-PIC: 	movl	src+64, %eax
-; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
-; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC: 	movl	src+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: qux02:
-; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _qux02:
-; DARWIN-32-STATIC: 	movl	_src+64, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC: 	movl	_src+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _qux02:
-; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux02:
 ; DARWIN-32-PIC: 	calll	L16$pb
 ; DARWIN-32-PIC-NEXT: L16$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L16$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L16$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L16$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L16$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 64([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qux02:
-; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _qux02:
-; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _qux02:
-; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -1192,79 +1203,79 @@ entry:
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
 ; LINUX-64-STATIC: qxx02:
-; LINUX-64-STATIC: movl    xsrc+64(%rip), %eax
-; LINUX-64-STATIC: movq    ptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: movl    xsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]])
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: qxx02:
-; LINUX-32-STATIC: 	movl	xsrc+64, %eax
-; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC: 	movl	xsrc+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 	ret void
 
 ; LINUX-32-PIC: qxx02:
-; LINUX-32-PIC: 	movl	xsrc+64, %eax
-; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
-; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC: 	movl	xsrc+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: qxx02:
-; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _qxx02:
-; DARWIN-32-STATIC: 	movl	_xsrc+64, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC: 	movl	_xsrc+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _qxx02:
-; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qxx02:
 ; DARWIN-32-PIC: 	calll	L17$pb
 ; DARWIN-32-PIC-NEXT: L17$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L17$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L17$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L17$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L17$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 64([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qxx02:
-; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _qxx02:
-; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _qxx02:
-; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -1274,58 +1285,58 @@ entry:
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), align 32
 	ret void
 ; LINUX-64-STATIC: qux03:
-; LINUX-64-STATIC: movl    dsrc+64(%rip), %eax
-; LINUX-64-STATIC: movl    %eax, ddst+64(%rip)
+; LINUX-64-STATIC: movl    dsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ddst+64(%rip)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: qux03:
-; LINUX-32-STATIC: 	movl	dsrc+64, %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst+64
+; LINUX-32-STATIC: 	movl	dsrc+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ddst+64
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: qux03:
-; LINUX-32-PIC: 	movl	dsrc+64, %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, ddst+64
+; LINUX-32-PIC: 	movl	dsrc+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ddst+64
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: qux03:
-; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _qux03:
-; DARWIN-32-STATIC: 	movl	_dsrc+64, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst+64
+; DARWIN-32-STATIC: 	movl	_dsrc+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ddst+64
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _qux03:
-; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst+64
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _ddst+64
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux03:
 ; DARWIN-32-PIC: 	calll	L18$pb
 ; DARWIN-32-PIC-NEXT: L18$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L18$pb)+64(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L18$pb)+64(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L18$pb)+64([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], (_ddst-L18$pb)+64([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qux03:
-; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), %eax
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], _ddst+64(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _qux03:
-; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], _ddst+64(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _qux03:
-; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), %eax
-; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], _ddst+64(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -1346,10 +1357,10 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: qux04:
-; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	addq	$64, %rax
-; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	addq	$64, [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _qux04:
@@ -1363,24 +1374,24 @@ entry:
 ; DARWIN-32-PIC: _qux04:
 ; DARWIN-32-PIC: 	calll	L19$pb
 ; DARWIN-32-PIC-NEXT: L19$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L19$pb)+64(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L19$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L19$pb)+64([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _dptr-L19$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qux04:
-; DARWIN-64-STATIC: 	leaq	_ddst+64(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC: 	leaq	_ddst+64(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _qux04:
-; DARWIN-64-DYNAMIC: 	leaq	_ddst+64(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+64(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _qux04:
-; DARWIN-64-PIC: 	leaq	_ddst+64(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC: 	leaq	_ddst+64(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -1391,69 +1402,69 @@ entry:
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
 ; LINUX-64-STATIC: qux05:
-; LINUX-64-STATIC: movl    dsrc+64(%rip), %eax
-; LINUX-64-STATIC: movq    dptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: movl    dsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    dptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]])
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: qux05:
-; LINUX-32-STATIC: 	movl	dsrc+64, %eax
-; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC: 	movl	dsrc+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 	ret void
 
 ; LINUX-32-PIC: qux05:
-; LINUX-32-PIC: 	movl	dsrc+64, %eax
-; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
-; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC: 	movl	dsrc+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: qux05:
-; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _qux05:
-; DARWIN-32-STATIC: 	movl	_dsrc+64, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC: 	movl	_dsrc+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _qux05:
-; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux05:
 ; DARWIN-32-PIC: 	calll	L20$pb
 ; DARWIN-32-PIC-NEXT: L20$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L20$pb)+64(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L20$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L20$pb)+64([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L20$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 64([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qux05:
-; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _qux05:
-; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _qux05:
-; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -1463,56 +1474,56 @@ entry:
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), align 4
 	ret void
 ; LINUX-64-STATIC: qux06:
-; LINUX-64-STATIC: movl    lsrc+64(%rip), %eax
-; LINUX-64-STATIC: movl    %eax, ldst+64
+; LINUX-64-STATIC: movl    lsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ldst+64
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: qux06:
-; LINUX-32-STATIC: 	movl	lsrc+64, %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst+64
+; LINUX-32-STATIC: 	movl	lsrc+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ldst+64
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: qux06:
-; LINUX-32-PIC: 	movl	lsrc+64, %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, ldst+64
+; LINUX-32-PIC: 	movl	lsrc+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ldst+64
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: qux06:
-; LINUX-64-PIC: 	movl	lsrc+64(%rip), %eax
-; LINUX-64-PIC-NEXT: 	movl	%eax, ldst+64(%rip)
+; LINUX-64-PIC: 	movl	lsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ldst+64(%rip)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _qux06:
-; DARWIN-32-STATIC: 	movl	_lsrc+64, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst+64
+; DARWIN-32-STATIC: 	movl	_lsrc+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ldst+64
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _qux06:
-; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst+64
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _ldst+64
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux06:
 ; DARWIN-32-PIC: 	calll	L21$pb
 ; DARWIN-32-PIC-NEXT: L21$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L21$pb)+64(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L21$pb)+64(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L21$pb)+64([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], (_ldst-L21$pb)+64([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qux06:
-; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), %eax
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], _ldst+64(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _qux06:
-; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], _ldst+64(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _qux06:
-; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), %eax
-; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], _ldst+64(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -1533,8 +1544,8 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: qux07:
-; LINUX-64-PIC: 	leaq	ldst+64(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC: 	leaq	ldst+64(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _qux07:
@@ -1548,24 +1559,24 @@ entry:
 ; DARWIN-32-PIC: _qux07:
 ; DARWIN-32-PIC: 	calll	L22$pb
 ; DARWIN-32-PIC-NEXT: L22$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L22$pb)+64(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L22$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L22$pb)+64([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _lptr-L22$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qux07:
-; DARWIN-64-STATIC: 	leaq	_ldst+64(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC: 	leaq	_ldst+64(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _qux07:
-; DARWIN-64-DYNAMIC: 	leaq	_ldst+64(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+64(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _qux07:
-; DARWIN-64-PIC: 	leaq	_ldst+64(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC: 	leaq	_ldst+64(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -1576,67 +1587,67 @@ entry:
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
 ; LINUX-64-STATIC: qux08:
-; LINUX-64-STATIC: movl    lsrc+64(%rip), %eax
-; LINUX-64-STATIC: movq    lptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: movl    lsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]])
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: qux08:
-; LINUX-32-STATIC: 	movl	lsrc+64, %eax
-; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC: 	movl	lsrc+64, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 	ret void
 
 ; LINUX-32-PIC: qux08:
-; LINUX-32-PIC: 	movl	lsrc+64, %eax
-; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
-; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC: 	movl	lsrc+64, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: qux08:
-; LINUX-64-PIC: 	movl	lsrc+64(%rip), %eax
-; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC: 	movl	lsrc+64(%rip), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _qux08:
-; DARWIN-32-STATIC: 	movl	_lsrc+64, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC: 	movl	_lsrc+64, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _qux08:
-; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux08:
 ; DARWIN-32-PIC: 	calll	L23$pb
 ; DARWIN-32-PIC-NEXT: L23$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L23$pb)+64(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L23$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L23$pb)+64([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L23$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 64([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _qux08:
-; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _qux08:
-; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _qux08:
-; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -1648,73 +1659,73 @@ entry:
 	store i32 %1, i32* %2, align 4
 	ret void
 ; LINUX-64-STATIC: ind00:
-; LINUX-64-STATIC: movl    src(,%rdi,4), %eax
-; LINUX-64-STATIC: movl    %eax, dst(,%rdi,4)
+; LINUX-64-STATIC: movl    src(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], dst(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ind00:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	src(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst(,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	src(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], dst(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ind00:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	src(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, dst(,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	src(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], dst(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ind00:
-; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ind00:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_src(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst(,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_src(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _dst(,[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ind00:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind00:
 ; DARWIN-32-PIC: 	calll	L24$pb
 ; DARWIN-32-PIC-NEXT: L24$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L24$pb(%eax), %edx
-; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L24$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L24$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L24$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], ([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ind00:
-; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ind00:
-; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ind00:
-; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -1726,73 +1737,73 @@ entry:
 	store i32 %1, i32* %2, align 4
 	ret void
 ; LINUX-64-STATIC: ixd00:
-; LINUX-64-STATIC: movl    xsrc(,%rdi,4), %eax
-; LINUX-64-STATIC: movl    %eax, xdst(,%rdi,4)
+; LINUX-64-STATIC: movl    xsrc(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], xdst(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ixd00:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, xdst(,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	xsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], xdst(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ixd00:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, xdst(,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	xsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], xdst(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ixd00:
-; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ixd00:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _xdst(,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _xdst(,[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ixd00:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ixd00:
 ; DARWIN-32-PIC: 	calll	L25$pb
 ; DARWIN-32-PIC-NEXT: L25$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L25$pb(%eax), %edx
-; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L25$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L25$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L25$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], ([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ixd00:
-; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ixd00:
-; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ixd00:
-; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -1802,73 +1813,73 @@ entry:
 	store i32* %0, i32** @ptr, align 8
 	ret void
 ; LINUX-64-STATIC: ind01:
-; LINUX-64-STATIC: leaq    dst(,%rdi,4), %rax
-; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: leaq    dst(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], ptr
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ind01:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	dst(,%eax,4), %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	dst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ind01:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	dst(,%eax,4), %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	dst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ind01:
 ; LINUX-64-PIC: 	shlq	$2, %rdi
 ; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rdi
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	%rdi, ([[RAX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ind01:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_dst(,%eax,4), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_dst(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ptr
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ind01:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind01:
 ; DARWIN-32-PIC: 	calll	L26$pb
 ; DARWIN-32-PIC-NEXT: L26$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	shll	$2, %ecx
-; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L26$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L26$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	shll	$2, [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L26$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L26$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ind01:
 ; DARWIN-64-STATIC: 	shlq	$2, %rdi
 ; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	%rdi, ([[RAX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ind01:
 ; DARWIN-64-DYNAMIC: 	shlq	$2, %rdi
 ; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, ([[RAX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ind01:
 ; DARWIN-64-PIC: 	shlq	$2, %rdi
 ; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	%rdi, ([[RAX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -1878,73 +1889,73 @@ entry:
 	store i32* %0, i32** @ptr, align 8
 	ret void
 ; LINUX-64-STATIC: ixd01:
-; LINUX-64-STATIC: leaq    xdst(,%rdi,4), %rax
-; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: leaq    xdst(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], ptr
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ixd01:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	xdst(,%eax,4), %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	xdst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ixd01:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	xdst(,%eax,4), %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	xdst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ixd01:
 ; LINUX-64-PIC: 	shlq	$2, %rdi
 ; LINUX-64-PIC-NEXT: 	addq	xdst@GOTPCREL(%rip), %rdi
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	%rdi, ([[RAX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ixd01:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_xdst(,%eax,4), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ptr
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ixd01:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	addl	L_xdst$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_xdst$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ixd01:
 ; DARWIN-32-PIC: 	calll	L27$pb
 ; DARWIN-32-PIC-NEXT: L27$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	shll	$2, %ecx
-; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L27$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L27$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	shll	$2, [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L27$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L27$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ixd01:
 ; DARWIN-64-STATIC: 	shlq	$2, %rdi
 ; DARWIN-64-STATIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	%rdi, ([[RAX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ixd01:
 ; DARWIN-64-DYNAMIC: 	shlq	$2, %rdi
 ; DARWIN-64-DYNAMIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, ([[RAX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ixd01:
 ; DARWIN-64-PIC: 	shlq	$2, %rdi
 ; DARWIN-64-PIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	%rdi, ([[RAX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -1957,83 +1968,83 @@ entry:
 	store i32 %2, i32* %3, align 4
 	ret void
 ; LINUX-64-STATIC: ind02:
-; LINUX-64-STATIC: movl    src(,%rdi,4), %eax
-; LINUX-64-STATIC: movq    ptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: movl    src(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ind02:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	src(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	src(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ind02:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	src(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	src(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ind02:
-; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ind02:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_src(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_src(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ind02:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EDX]]), [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind02:
 ; DARWIN-32-PIC: 	calll	L28$pb
 ; DARWIN-32-PIC-NEXT: L28$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L28$pb(%eax), %edx
-; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L28$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L28$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L28$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], ([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ind02:
-; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ind02:
-; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ind02:
-; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -2046,83 +2057,83 @@ entry:
 	store i32 %2, i32* %3, align 4
 	ret void
 ; LINUX-64-STATIC: ixd02:
-; LINUX-64-STATIC: movl    xsrc(,%rdi,4), %eax
-; LINUX-64-STATIC: movq    ptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: movl    xsrc(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ixd02:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	xsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ixd02:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	xsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ixd02:
-; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ixd02:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ixd02:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EDX]]), [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ixd02:
 ; DARWIN-32-PIC: 	calll	L29$pb
 ; DARWIN-32-PIC-NEXT: L29$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L29$pb(%eax), %edx
-; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L29$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L29$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L29$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], ([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ixd02:
-; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ixd02:
-; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ixd02:
-; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -2134,69 +2145,69 @@ entry:
 	store i32 %1, i32* %2, align 4
 	ret void
 ; LINUX-64-STATIC: ind03:
-; LINUX-64-STATIC: movl    dsrc(,%rdi,4), %eax
-; LINUX-64-STATIC: movl    %eax, ddst(,%rdi,4)
+; LINUX-64-STATIC: movl    dsrc(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ddst(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ind03:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst(,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ddst(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ind03:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst(,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ddst(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ind03:
-; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ind03:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst(,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _ddst(,[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ind03:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst(,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], _ddst(,[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind03:
 ; DARWIN-32-PIC: 	calll	L30$pb
 ; DARWIN-32-PIC-NEXT: L30$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L30$pb(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	%edx, _ddst-L30$pb(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L30$pb([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], _ddst-L30$pb([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ind03:
-; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ind03:
-; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ind03:
-; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -2206,66 +2217,66 @@ entry:
 	store i32* %0, i32** @dptr, align 8
 	ret void
 ; LINUX-64-STATIC: ind04:
-; LINUX-64-STATIC: leaq    ddst(,%rdi,4), %rax
-; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: leaq    ddst(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], dptr
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ind04:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	ddst(,%eax,4), %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ddst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ind04:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	ddst(,%eax,4), %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ddst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dptr
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ind04:
 ; LINUX-64-PIC: 	shlq	$2, %rdi
 ; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rdi
-; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	%rdi, ([[RAX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ind04:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_ddst(,%eax,4), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _dptr
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ind04:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst(,%eax,4), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _dptr
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind04:
 ; DARWIN-32-PIC: 	calll	L31$pb
 ; DARWIN-32-PIC-NEXT: L31$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	_ddst-L31$pb(%eax,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L31$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L31$pb([[EAX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _dptr-L31$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ind04:
-; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ind04:
-; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ind04:
-; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
-; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -2278,76 +2289,76 @@ entry:
 	store i32 %2, i32* %3, align 4
 	ret void
 ; LINUX-64-STATIC: ind05:
-; LINUX-64-STATIC: movl    dsrc(,%rdi,4), %eax
-; LINUX-64-STATIC: movq    dptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: movl    dsrc(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    dptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ind05:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ind05:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ind05:
-; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ind05:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ind05:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind05:
 ; DARWIN-32-PIC: 	calll	L32$pb
 ; DARWIN-32-PIC-NEXT: L32$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L32$pb(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L32$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L32$pb([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L32$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], ([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ind05:
-; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ind05:
-; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ind05:
-; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -2359,69 +2370,69 @@ entry:
 	store i32 %1, i32* %2, align 4
 	ret void
 ; LINUX-64-STATIC: ind06:
-; LINUX-64-STATIC: movl    lsrc(,%rdi,4), %eax
-; LINUX-64-STATIC: movl    %eax, ldst(,%rdi,4)
+; LINUX-64-STATIC: movl    lsrc(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ldst(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ind06:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst(,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ldst(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ind06:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst(,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ldst(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ind06:
-; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ind06:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst(,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _ldst(,[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ind06:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst(,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], _ldst(,[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind06:
 ; DARWIN-32-PIC: 	calll	L33$pb
 ; DARWIN-32-PIC-NEXT: L33$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L33$pb(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	%edx, _ldst-L33$pb(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L33$pb([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], _ldst-L33$pb([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ind06:
-; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ind06:
-; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ind06:
-; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -2431,65 +2442,65 @@ entry:
 	store i32* %0, i32** @lptr, align 8
 	ret void
 ; LINUX-64-STATIC: ind07:
-; LINUX-64-STATIC: leaq    ldst(,%rdi,4), %rax
-; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: leaq    ldst(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], lptr
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ind07:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	ldst(,%eax,4), %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ldst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], lptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ind07:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	ldst(,%eax,4), %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ldst(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], lptr
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ind07:
-; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
-; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ind07:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_ldst(,%eax,4), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _lptr
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ind07:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst(,%eax,4), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _lptr
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind07:
 ; DARWIN-32-PIC: 	calll	L34$pb
 ; DARWIN-32-PIC-NEXT: L34$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	_ldst-L34$pb(%eax,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L34$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L34$pb([[EAX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _lptr-L34$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ind07:
-; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ind07:
-; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ind07:
-; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
-; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -2502,75 +2513,75 @@ entry:
 	store i32 %2, i32* %3, align 4
 	ret void
 ; LINUX-64-STATIC: ind08:
-; LINUX-64-STATIC: movl    lsrc(,%rdi,4), %eax
-; LINUX-64-STATIC: movq    lptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: movl    lsrc(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ind08:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ind08:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ind08:
-; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ind08:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ind08:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind08:
 ; DARWIN-32-PIC: 	calll	L35$pb
 ; DARWIN-32-PIC-NEXT: L35$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L35$pb(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L35$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L35$pb([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L35$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], ([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ind08:
-; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ind08:
-; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ind08:
-; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -2583,73 +2594,73 @@ entry:
 	store i32 %2, i32* %3, align 4
 	ret void
 ; LINUX-64-STATIC: off00:
-; LINUX-64-STATIC: movl    src+64(,%rdi,4), %eax
-; LINUX-64-STATIC: movl    %eax, dst+64(,%rdi,4)
+; LINUX-64-STATIC: movl    src+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], dst+64(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: off00:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	src+64(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst+64(,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	src+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], dst+64(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: off00:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	src+64(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, dst+64(,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	src+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], dst+64(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: off00:
-; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _off00:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst+64(,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _dst+64(,[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _off00:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off00:
 ; DARWIN-32-PIC: 	calll	L36$pb
 ; DARWIN-32-PIC-NEXT: L36$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L36$pb(%eax), %edx
-; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L36$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L36$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L36$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 64([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _off00:
-; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _off00:
-; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _off00:
-; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -2662,73 +2673,73 @@ entry:
 	store i32 %2, i32* %3, align 4
 	ret void
 ; LINUX-64-STATIC: oxf00:
-; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), %eax
-; LINUX-64-STATIC: movl    %eax, xdst+64(,%rdi,4)
+; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], xdst+64(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: oxf00:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, xdst+64(,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], xdst+64(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: oxf00:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, xdst+64(,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], xdst+64(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: oxf00:
-; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _oxf00:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _xdst+64(,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _xdst+64(,[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _oxf00:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _oxf00:
 ; DARWIN-32-PIC: 	calll	L37$pb
 ; DARWIN-32-PIC-NEXT: L37$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L37$pb(%eax), %edx
-; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L37$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L37$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L37$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 64([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _oxf00:
-; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _oxf00:
-; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _oxf00:
-; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -2739,73 +2750,73 @@ entry:
 	store i32* %0, i32** @ptr, align 8
 	ret void
 ; LINUX-64-STATIC: off01:
-; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), %rax
-; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], ptr
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: off01:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	dst+64(,%eax,4), %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	dst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: off01:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	dst+64(,%eax,4), %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	dst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: off01:
-; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _off01:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,%eax,4), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ptr
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _off01:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off01:
 ; DARWIN-32-PIC: 	calll	L38$pb
 ; DARWIN-32-PIC-NEXT: L38$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L38$pb(%eax), %edx
-; DARWIN-32-PIC-NEXT: 	leal	64(%edx,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L38$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L38$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EDX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L38$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _off01:
-; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _off01:
-; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _off01:
-; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -2816,73 +2827,73 @@ entry:
 	store i32* %0, i32** @ptr, align 8
 	ret void
 ; LINUX-64-STATIC: oxf01:
-; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), %rax
-; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], ptr
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: oxf01:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: oxf01:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	xdst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: oxf01:
-; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _oxf01:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,%eax,4), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ptr
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _oxf01:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _oxf01:
 ; DARWIN-32-PIC: 	calll	L39$pb
 ; DARWIN-32-PIC-NEXT: L39$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L39$pb(%eax), %edx
-; DARWIN-32-PIC-NEXT: 	leal	64(%edx,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L39$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L39$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EDX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L39$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _oxf01:
-; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _oxf01:
-; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _oxf01:
-; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -2896,83 +2907,83 @@ entry:
 	store i32 %3, i32* %4, align 4
 	ret void
 ; LINUX-64-STATIC: off02:
-; LINUX-64-STATIC: movl    src+64(,%rdi,4), %eax
-; LINUX-64-STATIC: movq    ptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: movl    src+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: off02:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	src+64(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	src+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: off02:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	src+64(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	src+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: off02:
-; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _off02:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _off02:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EDX]]), [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off02:
 ; DARWIN-32-PIC: 	calll	L40$pb
 ; DARWIN-32-PIC-NEXT: L40$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L40$pb(%eax), %edx
-; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L40$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L40$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L40$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 64([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _off02:
-; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _off02:
-; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _off02:
-; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -2986,83 +2997,83 @@ entry:
 	store i32 %3, i32* %4, align 4
 	ret void
 ; LINUX-64-STATIC: oxf02:
-; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), %eax
-; LINUX-64-STATIC: movq    ptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: oxf02:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: oxf02:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: oxf02:
-; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _oxf02:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _oxf02:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EDX]]), [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _oxf02:
 ; DARWIN-32-PIC: 	calll	L41$pb
 ; DARWIN-32-PIC-NEXT: L41$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L41$pb(%eax), %edx
-; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L41$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L41$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	64([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L41$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 64([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _oxf02:
-; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _oxf02:
-; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _oxf02:
-; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -3075,69 +3086,69 @@ entry:
 	store i32 %2, i32* %3, align 4
 	ret void
 ; LINUX-64-STATIC: off03:
-; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), %eax
-; LINUX-64-STATIC: movl    %eax, ddst+64(,%rdi,4)
+; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ddst+64(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: off03:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst+64(,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ddst+64(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: off03:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst+64(,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ddst+64(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: off03:
-; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _off03:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst+64(,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _ddst+64(,[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _off03:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst+64(,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], _ddst+64(,[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off03:
 ; DARWIN-32-PIC: 	calll	L42$pb
 ; DARWIN-32-PIC-NEXT: L42$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L42$pb)+64(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L42$pb)+64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L42$pb)+64([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], (_ddst-L42$pb)+64([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _off03:
-; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _off03:
-; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _off03:
-; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -3148,66 +3159,66 @@ entry:
 	store i32* %0, i32** @dptr, align 8
 	ret void
 ; LINUX-64-STATIC: off04:
-; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), %rax
-; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], dptr
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: off04:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: off04:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ddst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dptr
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: off04:
-; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _off04:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _dptr
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _off04:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _dptr
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off04:
 ; DARWIN-32-PIC: 	calll	L43$pb
 ; DARWIN-32-PIC-NEXT: L43$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L43$pb)+64(%eax,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L43$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L43$pb)+64([[EAX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _dptr-L43$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _off04:
-; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _off04:
-; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _off04:
-; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -3221,76 +3232,76 @@ entry:
 	store i32 %3, i32* %4, align 4
 	ret void
 ; LINUX-64-STATIC: off05:
-; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), %eax
-; LINUX-64-STATIC: movq    dptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    dptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: off05:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: off05:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: off05:
-; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _off05:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _off05:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off05:
 ; DARWIN-32-PIC: 	calll	L44$pb
 ; DARWIN-32-PIC-NEXT: L44$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L44$pb)+64(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L44$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L44$pb)+64([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L44$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 64([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _off05:
-; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _off05:
-; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _off05:
-; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -3303,69 +3314,69 @@ entry:
 	store i32 %2, i32* %3, align 4
 	ret void
 ; LINUX-64-STATIC: off06:
-; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), %eax
-; LINUX-64-STATIC: movl    %eax, ldst+64(,%rdi,4)
+; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ldst+64(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: off06:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst+64(,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ldst+64(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: off06:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst+64(,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ldst+64(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: off06:
-; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _off06:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst+64(,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _ldst+64(,[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _off06:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst+64(,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], _ldst+64(,[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off06:
 ; DARWIN-32-PIC: 	calll	L45$pb
 ; DARWIN-32-PIC-NEXT: L45$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L45$pb)+64(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L45$pb)+64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L45$pb)+64([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], (_ldst-L45$pb)+64([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _off06:
-; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _off06:
-; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _off06:
-; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -3376,65 +3387,65 @@ entry:
 	store i32* %0, i32** @lptr, align 8
 	ret void
 ; LINUX-64-STATIC: off07:
-; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), %rax
-; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], lptr
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: off07:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], lptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: off07:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ldst+64(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], lptr
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: off07:
-; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _off07:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _lptr
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _off07:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _lptr
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off07:
 ; DARWIN-32-PIC: 	calll	L46$pb
 ; DARWIN-32-PIC-NEXT: L46$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L46$pb)+64(%eax,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L46$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L46$pb)+64([[EAX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _lptr-L46$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _off07:
-; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _off07:
-; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _off07:
-; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
-; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -3448,75 +3459,75 @@ entry:
 	store i32 %3, i32* %4, align 4
 	ret void
 ; LINUX-64-STATIC: off08:
-; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), %eax
-; LINUX-64-STATIC: movq    lptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: off08:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: off08:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: off08:
-; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _off08:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _off08:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off08:
 ; DARWIN-32-PIC: 	calll	L47$pb
 ; DARWIN-32-PIC-NEXT: L47$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L47$pb)+64(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L47$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L47$pb)+64([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L47$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 64([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _off08:
-; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _off08:
-; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _off08:
-; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -3526,68 +3537,68 @@ entry:
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), align 4
 	ret void
 ; LINUX-64-STATIC: moo00:
-; LINUX-64-STATIC: movl    src+262144(%rip), %eax
-; LINUX-64-STATIC: movl    %eax, dst+262144(%rip)
+; LINUX-64-STATIC: movl    src+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], dst+262144(%rip)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: moo00:
-; LINUX-32-STATIC: 	movl	src+262144, %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, dst+262144
+; LINUX-32-STATIC: 	movl	src+262144, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dst+262144
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: moo00:
-; LINUX-32-PIC: 	movl	src+262144, %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, dst+262144
+; LINUX-32-PIC: 	movl	src+262144, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dst+262144
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: moo00:
-; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _moo00:
-; DARWIN-32-STATIC: 	movl	_src+262144, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst+262144
+; DARWIN-32-STATIC: 	movl	_src+262144, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _dst+262144
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _moo00:
-; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%eax), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo00:
 ; DARWIN-32-PIC: 	calll	L48$pb
 ; DARWIN-32-PIC-NEXT: L48$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L48$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	262144(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L48$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L48$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	262144([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L48$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _moo00:
-; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _moo00:
-; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _moo00:
-; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	262144(%rax), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -3608,10 +3619,10 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: moo01:
-; LINUX-64-PIC: 	movl	$262144, %eax
-; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC: 	movl	$262144, [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _moo01:
@@ -3619,41 +3630,41 @@ entry:
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _moo01:
-; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	$262144, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo01:
 ; DARWIN-32-PIC: 	calll	L49$pb
 ; DARWIN-32-PIC-NEXT: L49$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	$262144, %ecx
-; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L49$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L49$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	$262144, [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L49$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L49$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _moo01:
-; DARWIN-64-STATIC: 	movl	$262144, %eax
-; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC: 	movl	$262144, [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _moo01:
-; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
-; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC: 	movl	$262144, [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _moo01:
-; DARWIN-64-PIC: 	movl	$262144, %eax
-; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC: 	movl	$262144, [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -3665,78 +3676,78 @@ entry:
 	store i32 %1, i32* %2, align 4
 	ret void
 ; LINUX-64-STATIC: moo02:
-; LINUX-64-STATIC: movl    src+262144(%rip), %eax
-; LINUX-64-STATIC: movq    ptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: movl    src+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]])
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: moo02:
-; LINUX-32-STATIC: 	movl	src+262144, %eax
-; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC: 	movl	src+262144, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: moo02:
-; LINUX-32-PIC: 	movl	src+262144, %eax
-; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
-; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC: 	movl	src+262144, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: moo02:
-; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _moo02:
-; DARWIN-32-STATIC: 	movl	_src+262144, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC: 	movl	_src+262144, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _moo02:
-; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%eax), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo02:
 ; DARWIN-32-PIC: 	calll	L50$pb
 ; DARWIN-32-PIC-NEXT: L50$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L50$pb(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	262144(%ecx), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L50$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L50$pb([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	262144([[ECX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L50$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _moo02:
-; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _moo02:
-; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _moo02:
-; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	262144(%rax), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -3746,58 +3757,58 @@ entry:
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), align 32
 	ret void
 ; LINUX-64-STATIC: moo03:
-; LINUX-64-STATIC: movl    dsrc+262144(%rip), %eax
-; LINUX-64-STATIC: movl    %eax, ddst+262144(%rip)
+; LINUX-64-STATIC: movl    dsrc+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ddst+262144(%rip)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: moo03:
-; LINUX-32-STATIC: 	movl	dsrc+262144, %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst+262144
+; LINUX-32-STATIC: 	movl	dsrc+262144, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ddst+262144
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: moo03:
-; LINUX-32-PIC: 	movl	dsrc+262144, %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, ddst+262144
+; LINUX-32-PIC: 	movl	dsrc+262144, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ddst+262144
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: moo03:
-; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _moo03:
-; DARWIN-32-STATIC: 	movl	_dsrc+262144, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst+262144
+; DARWIN-32-STATIC: 	movl	_dsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ddst+262144
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _moo03:
-; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst+262144
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _ddst+262144
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo03:
 ; DARWIN-32-PIC: 	calll	L51$pb
 ; DARWIN-32-PIC-NEXT: L51$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L51$pb)+262144(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L51$pb)+262144(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L51$pb)+262144([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], (_ddst-L51$pb)+262144([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _moo03:
-; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), %eax
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], _ddst+262144(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _moo03:
-; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], _ddst+262144(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _moo03:
-; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), %eax
-; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], _ddst+262144(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -3818,10 +3829,10 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: moo04:
-; LINUX-64-PIC: 	movl	$262144, %eax
-; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC: 	movl	$262144, [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _moo04:
@@ -3835,24 +3846,24 @@ entry:
 ; DARWIN-32-PIC: _moo04:
 ; DARWIN-32-PIC: 	calll	L52$pb
 ; DARWIN-32-PIC-NEXT: L52$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L52$pb)+262144(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L52$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L52$pb)+262144([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _dptr-L52$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _moo04:
-; DARWIN-64-STATIC: 	leaq	_ddst+262144(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC: 	leaq	_ddst+262144(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _moo04:
-; DARWIN-64-DYNAMIC: 	leaq	_ddst+262144(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+262144(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _moo04:
-; DARWIN-64-PIC: 	leaq	_ddst+262144(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC: 	leaq	_ddst+262144(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -3864,68 +3875,68 @@ entry:
 	store i32 %1, i32* %2, align 4
 	ret void
 ; LINUX-64-STATIC: moo05:
-; LINUX-64-STATIC: movl    dsrc+262144(%rip), %eax
-; LINUX-64-STATIC: movq    dptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: movl    dsrc+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    dptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]])
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: moo05:
-; LINUX-32-STATIC: 	movl	dsrc+262144, %eax
-; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC: 	movl	dsrc+262144, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: moo05:
-; LINUX-32-PIC: 	movl	dsrc+262144, %eax
-; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
-; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC: 	movl	dsrc+262144, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: moo05:
-; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
-; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _moo05:
-; DARWIN-32-STATIC: 	movl	_dsrc+262144, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC: 	movl	_dsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _moo05:
-; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo05:
 ; DARWIN-32-PIC: 	calll	L53$pb
 ; DARWIN-32-PIC-NEXT: L53$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L53$pb)+262144(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L53$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L53$pb)+262144([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L53$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _moo05:
-; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _moo05:
-; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _moo05:
-; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -3935,56 +3946,56 @@ entry:
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), align 4
 	ret void
 ; LINUX-64-STATIC: moo06:
-; LINUX-64-STATIC: movl    lsrc+262144(%rip), %eax
-; LINUX-64-STATIC: movl    %eax, ldst+262144(%rip)
+; LINUX-64-STATIC: movl    lsrc+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ldst+262144(%rip)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: moo06:
-; LINUX-32-STATIC: 	movl	lsrc+262144, %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst+262144
+; LINUX-32-STATIC: 	movl	lsrc+262144, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ldst+262144
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: moo06:
-; LINUX-32-PIC: 	movl	lsrc+262144, %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, ldst+262144
+; LINUX-32-PIC: 	movl	lsrc+262144, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ldst+262144
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: moo06:
-; LINUX-64-PIC: 	movl	lsrc+262144(%rip), %eax
-; LINUX-64-PIC-NEXT: 	movl	%eax, ldst+262144(%rip)
+; LINUX-64-PIC: 	movl	lsrc+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ldst+262144(%rip)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _moo06:
-; DARWIN-32-STATIC: 	movl	_lsrc+262144, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst+262144
+; DARWIN-32-STATIC: 	movl	_lsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ldst+262144
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _moo06:
-; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst+262144
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _ldst+262144
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo06:
 ; DARWIN-32-PIC: 	calll	L54$pb
 ; DARWIN-32-PIC-NEXT: L54$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L54$pb)+262144(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L54$pb)+262144(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L54$pb)+262144([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], (_ldst-L54$pb)+262144([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _moo06:
-; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), %eax
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], _ldst+262144(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _moo06:
-; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], _ldst+262144(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _moo06:
-; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), %eax
-; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], _ldst+262144(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -4005,8 +4016,8 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: moo07:
-; LINUX-64-PIC: 	leaq	ldst+262144(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC: 	leaq	ldst+262144(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _moo07:
@@ -4020,24 +4031,24 @@ entry:
 ; DARWIN-32-PIC: _moo07:
 ; DARWIN-32-PIC: 	calll	L55$pb
 ; DARWIN-32-PIC-NEXT: L55$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L55$pb)+262144(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L55$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L55$pb)+262144([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _lptr-L55$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _moo07:
-; DARWIN-64-STATIC: 	leaq	_ldst+262144(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC: 	leaq	_ldst+262144(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _moo07:
-; DARWIN-64-DYNAMIC: 	leaq	_ldst+262144(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+262144(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _moo07:
-; DARWIN-64-PIC: 	leaq	_ldst+262144(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC: 	leaq	_ldst+262144(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -4049,66 +4060,66 @@ entry:
 	store i32 %1, i32* %2, align 4
 	ret void
 ; LINUX-64-STATIC: moo08:
-; LINUX-64-STATIC: movl    lsrc+262144(%rip), %eax
-; LINUX-64-STATIC: movq    lptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: movl    lsrc+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]])
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: moo08:
-; LINUX-32-STATIC: 	movl	lsrc+262144, %eax
-; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC: 	movl	lsrc+262144, [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: moo08:
-; LINUX-32-PIC: 	movl	lsrc+262144, %eax
-; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
-; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC: 	movl	lsrc+262144, [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: moo08:
-; LINUX-64-PIC: 	movl	lsrc+262144(%rip), %eax
-; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC: 	movl	lsrc+262144(%rip), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _moo08:
-; DARWIN-32-STATIC: 	movl	_lsrc+262144, %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC: 	movl	_lsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _moo08:
-; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo08:
 ; DARWIN-32-PIC: 	calll	L56$pb
 ; DARWIN-32-PIC-NEXT: L56$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L56$pb)+262144(%eax), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L56$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L56$pb)+262144([[EAX]]), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L56$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _moo08:
-; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _moo08:
-; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _moo08:
-; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -4121,73 +4132,73 @@ entry:
 	store i32 %2, i32* %3, align 4
 	ret void
 ; LINUX-64-STATIC: big00:
-; LINUX-64-STATIC: movl    src+262144(,%rdi,4), %eax
-; LINUX-64-STATIC: movl    %eax, dst+262144(,%rdi,4)
+; LINUX-64-STATIC: movl    src+262144(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], dst+262144(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: big00:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst+262144(,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	src+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], dst+262144(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: big00:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, dst+262144(,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	src+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], dst+262144(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: big00:
-; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _big00:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst+262144(,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _dst+262144(,[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _big00:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%ecx,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big00:
 ; DARWIN-32-PIC: 	calll	L57$pb
 ; DARWIN-32-PIC-NEXT: L57$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L57$pb(%eax), %edx
-; DARWIN-32-PIC-NEXT: 	movl	262144(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L57$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L57$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	262144([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L57$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 262144([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _big00:
-; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _big00:
-; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _big00:
-; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -4198,73 +4209,73 @@ entry:
 	store i32* %0, i32** @ptr, align 8
 	ret void
 ; LINUX-64-STATIC: big01:
-; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), %rax
-; LINUX-64-STATIC: movq    %rax, ptr(%rip)
+; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], ptr(%rip)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: big01:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: big01:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	dst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: big01:
-; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _big01:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,%eax,4), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _ptr
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _big01:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big01:
 ; DARWIN-32-PIC: 	calll	L58$pb
 ; DARWIN-32-PIC-NEXT: L58$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L58$pb(%eax), %edx
-; DARWIN-32-PIC-NEXT: 	leal	262144(%edx,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L58$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L58$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EDX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L58$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], ([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _big01:
-; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _big01:
-; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _big01:
-; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -4278,83 +4289,83 @@ entry:
 	store i32 %3, i32* %4, align 4
 	ret void
 ; LINUX-64-STATIC: big02:
-; LINUX-64-STATIC: movl    src+262144(,%rdi,4), %eax
-; LINUX-64-STATIC: movq    ptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: movl    src+262144(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: big02:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	src+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: big02:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	src+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: big02:
-; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _big02:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _big02:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%ecx,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144([[ECX]],[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EDX]]), [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big02:
 ; DARWIN-32-PIC: 	calll	L59$pb
 ; DARWIN-32-PIC-NEXT: L59$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L59$pb(%eax), %edx
-; DARWIN-32-PIC-NEXT: 	movl	262144(%edx,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L59$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L59$pb([[EAX]]), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	262144([[EDX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L59$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 262144([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _big02:
-; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _big02:
-; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _big02:
-; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -4367,69 +4378,69 @@ entry:
 	store i32 %2, i32* %3, align 4
 	ret void
 ; LINUX-64-STATIC: big03:
-; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), %eax
-; LINUX-64-STATIC: movl    %eax, ddst+262144(,%rdi,4)
+; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ddst+262144(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: big03:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst+262144(,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ddst+262144(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: big03:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst+262144(,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ddst+262144(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: big03:
-; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _big03:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst+262144(,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _ddst+262144(,[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _big03:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst+262144(,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], _ddst+262144(,[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big03:
 ; DARWIN-32-PIC: 	calll	L60$pb
 ; DARWIN-32-PIC-NEXT: L60$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L60$pb)+262144(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L60$pb)+262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L60$pb)+262144([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], (_ddst-L60$pb)+262144([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _big03:
-; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _big03:
-; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _big03:
-; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -4440,66 +4451,66 @@ entry:
 	store i32* %0, i32** @dptr, align 8
 	ret void
 ; LINUX-64-STATIC: big04:
-; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), %rax
-; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], dptr
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: big04:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: big04:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dptr
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: big04:
-; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
-; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _big04:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _dptr
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _big04:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _dptr
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big04:
 ; DARWIN-32-PIC: 	calll	L61$pb
 ; DARWIN-32-PIC-NEXT: L61$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L61$pb)+262144(%eax,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L61$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L61$pb)+262144([[EAX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _dptr-L61$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _big04:
-; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _big04:
-; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _big04:
-; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
-; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _dptr(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -4513,76 +4524,76 @@ entry:
 	store i32 %3, i32* %4, align 4
 	ret void
 ; LINUX-64-STATIC: big05:
-; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), %eax
-; LINUX-64-STATIC: movq    dptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    dptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: big05:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: big05:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: big05:
-; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RCX]]), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _big05:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _big05:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big05:
 ; DARWIN-32-PIC: 	calll	L62$pb
 ; DARWIN-32-PIC-NEXT: L62$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L62$pb)+262144(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L62$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L62$pb)+262144([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L62$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 262144([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _big05:
-; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _big05:
-; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _big05:
-; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -4595,69 +4606,69 @@ entry:
 	store i32 %2, i32* %3, align 4
 	ret void
 ; LINUX-64-STATIC: big06:
-; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), %eax
-; LINUX-64-STATIC: movl    %eax, ldst+262144(,%rdi,4)
+; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movl    [[EAX]], ldst+262144(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: big06:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst+262144(,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ldst+262144(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: big06:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst+262144(,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ldst+262144(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: big06:
-; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _big06:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst+262144(,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], _ldst+262144(,[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _big06:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst+262144(,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], _ldst+262144(,[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big06:
 ; DARWIN-32-PIC: 	calll	L63$pb
 ; DARWIN-32-PIC-NEXT: L63$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L63$pb)+262144(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L63$pb)+262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L63$pb)+262144([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], (_ldst-L63$pb)+262144([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _big06:
-; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _big06:
-; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _big06:
-; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -4668,65 +4679,65 @@ entry:
 	store i32* %0, i32** @lptr, align 8
 	ret void
 ; LINUX-64-STATIC: big07:
-; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), %rax
-; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), [[RAX:%r.x]]
+; LINUX-64-STATIC: movq    [[RAX]], lptr
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: big07:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
-; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], lptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: big07:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
-; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[EAX]], lptr
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: big07:
-; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
-; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _big07:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[EAX]], _lptr
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _big07:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,[[EAX]],4), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[EAX]], _lptr
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big07:
 ; DARWIN-32-PIC: 	calll	L64$pb
 ; DARWIN-32-PIC-NEXT: L64$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L64$pb)+262144(%eax,%ecx,4), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L64$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L64$pb)+262144([[EAX]],[[ECX]],4), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[ECX]], _lptr-L64$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _big07:
-; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _big07:
-; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _big07:
-; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
-; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	[[RAX]], _lptr(%rip)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -4740,75 +4751,75 @@ entry:
 	store i32 %3, i32* %4, align 4
 	ret void
 ; LINUX-64-STATIC: big08:
-; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), %eax
-; LINUX-64-STATIC: movq    lptr(%rip), %rcx
-; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), [[EAX:%e.x]]
+; LINUX-64-STATIC: movq    lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: big08:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
-; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
-; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: big08:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
-; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
-; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: big08:
-; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
-; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _big08:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
-; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
-; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[EDX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _big08:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
-; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[EDX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big08:
 ; DARWIN-32-PIC: 	calll	L65$pb
 ; DARWIN-32-PIC-NEXT: L65$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L65$pb)+262144(%eax,%ecx,4), %edx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L65$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L65$pb)+262144([[EAX]],[[ECX]],4), [[EDX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L65$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	[[EDX]], 262144([[EAX]],[[ECX]],4)
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _big08:
-; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _big08:
-; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _big08:
-; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
-; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
-; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), [[RCX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]],%rdi,4)
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -4842,8 +4853,8 @@ entry:
 ; DARWIN-32-PIC: _bar00:
 ; DARWIN-32-PIC: 	calll	L66$pb
 ; DARWIN-32-PIC-NEXT: L66$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L66$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L66$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar00:
@@ -4889,8 +4900,8 @@ entry:
 ; DARWIN-32-PIC: _bxr00:
 ; DARWIN-32-PIC: 	calll	L67$pb
 ; DARWIN-32-PIC-NEXT: L67$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L67$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L67$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bxr00:
@@ -4936,8 +4947,8 @@ entry:
 ; DARWIN-32-PIC: _bar01:
 ; DARWIN-32-PIC: 	calll	L68$pb
 ; DARWIN-32-PIC-NEXT: L68$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L68$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L68$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar01:
@@ -4983,8 +4994,8 @@ entry:
 ; DARWIN-32-PIC: _bxr01:
 ; DARWIN-32-PIC: 	calll	L69$pb
 ; DARWIN-32-PIC-NEXT: L69$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L69$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L69$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bxr01:
@@ -5030,8 +5041,8 @@ entry:
 ; DARWIN-32-PIC: _bar02:
 ; DARWIN-32-PIC: 	calll	L70$pb
 ; DARWIN-32-PIC-NEXT: L70$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L70$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L70$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar02:
@@ -5077,8 +5088,8 @@ entry:
 ; DARWIN-32-PIC: _bar03:
 ; DARWIN-32-PIC: 	calll	L71$pb
 ; DARWIN-32-PIC-NEXT: L71$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L71$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L71$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar03:
@@ -5124,8 +5135,8 @@ entry:
 ; DARWIN-32-PIC: _bar04:
 ; DARWIN-32-PIC: 	calll	L72$pb
 ; DARWIN-32-PIC-NEXT: L72$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_ddst-L72$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L72$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar04:
@@ -5171,8 +5182,8 @@ entry:
 ; DARWIN-32-PIC: _bar05:
 ; DARWIN-32-PIC: 	calll	L73$pb
 ; DARWIN-32-PIC-NEXT: L73$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_dptr-L73$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_dptr-L73$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar05:
@@ -5218,8 +5229,8 @@ entry:
 ; DARWIN-32-PIC: _bar06:
 ; DARWIN-32-PIC: 	calll	L74$pb
 ; DARWIN-32-PIC-NEXT: L74$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L74$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L74$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar06:
@@ -5265,8 +5276,8 @@ entry:
 ; DARWIN-32-PIC: _bar07:
 ; DARWIN-32-PIC: 	calll	L75$pb
 ; DARWIN-32-PIC-NEXT: L75$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_ldst-L75$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L75$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar07:
@@ -5312,8 +5323,8 @@ entry:
 ; DARWIN-32-PIC: _bar08:
 ; DARWIN-32-PIC: 	calll	L76$pb
 ; DARWIN-32-PIC-NEXT: L76$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_lptr-L76$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_lptr-L76$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bar08:
@@ -5359,8 +5370,8 @@ entry:
 ; DARWIN-32-PIC: _har00:
 ; DARWIN-32-PIC: 	calll	L77$pb
 ; DARWIN-32-PIC-NEXT: L77$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L77$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L77$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har00:
@@ -5406,8 +5417,8 @@ entry:
 ; DARWIN-32-PIC: _hxr00:
 ; DARWIN-32-PIC: 	calll	L78$pb
 ; DARWIN-32-PIC-NEXT: L78$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L78$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L78$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _hxr00:
@@ -5453,8 +5464,8 @@ entry:
 ; DARWIN-32-PIC: _har01:
 ; DARWIN-32-PIC: 	calll	L79$pb
 ; DARWIN-32-PIC-NEXT: L79$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L79$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L79$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har01:
@@ -5500,8 +5511,8 @@ entry:
 ; DARWIN-32-PIC: _hxr01:
 ; DARWIN-32-PIC: 	calll	L80$pb
 ; DARWIN-32-PIC-NEXT: L80$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L80$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L80$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _hxr01:
@@ -5535,8 +5546,8 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: har02:
-; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _har02:
@@ -5544,31 +5555,31 @@ entry:
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _har02:
-; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har02:
 ; DARWIN-32-PIC: 	calll	L81$pb
 ; DARWIN-32-PIC-NEXT: L81$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L81$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L81$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har02:
-; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RAX]]), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _har02:
-; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RAX]]), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _har02:
-; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RAX]]), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -5602,8 +5613,8 @@ entry:
 ; DARWIN-32-PIC: _har03:
 ; DARWIN-32-PIC: 	calll	L82$pb
 ; DARWIN-32-PIC-NEXT: L82$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L82$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L82$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har03:
@@ -5649,8 +5660,8 @@ entry:
 ; DARWIN-32-PIC: _har04:
 ; DARWIN-32-PIC: 	calll	L83$pb
 ; DARWIN-32-PIC-NEXT: L83$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_ddst-L83$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L83$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har04:
@@ -5684,8 +5695,8 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: har05:
-; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _har05:
@@ -5699,8 +5710,8 @@ entry:
 ; DARWIN-32-PIC: _har05:
 ; DARWIN-32-PIC: 	calll	L84$pb
 ; DARWIN-32-PIC-NEXT: L84$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L84$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L84$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har05:
@@ -5746,8 +5757,8 @@ entry:
 ; DARWIN-32-PIC: _har06:
 ; DARWIN-32-PIC: 	calll	L85$pb
 ; DARWIN-32-PIC-NEXT: L85$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L85$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L85$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har06:
@@ -5793,8 +5804,8 @@ entry:
 ; DARWIN-32-PIC: _har07:
 ; DARWIN-32-PIC: 	calll	L86$pb
 ; DARWIN-32-PIC-NEXT: L86$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_ldst-L86$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L86$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har07:
@@ -5842,8 +5853,8 @@ entry:
 ; DARWIN-32-PIC: _har08:
 ; DARWIN-32-PIC: 	calll	L87$pb
 ; DARWIN-32-PIC-NEXT: L87$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L87$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L87$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _har08:
@@ -5891,8 +5902,8 @@ entry:
 ; DARWIN-32-PIC: _bat00:
 ; DARWIN-32-PIC: 	calll	L88$pb
 ; DARWIN-32-PIC-NEXT: L88$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L88$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L88$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -5944,8 +5955,8 @@ entry:
 ; DARWIN-32-PIC: _bxt00:
 ; DARWIN-32-PIC: 	calll	L89$pb
 ; DARWIN-32-PIC-NEXT: L89$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L89$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L89$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -5997,8 +6008,8 @@ entry:
 ; DARWIN-32-PIC: _bat01:
 ; DARWIN-32-PIC: 	calll	L90$pb
 ; DARWIN-32-PIC-NEXT: L90$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L90$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L90$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -6050,8 +6061,8 @@ entry:
 ; DARWIN-32-PIC: _bxt01:
 ; DARWIN-32-PIC: 	calll	L91$pb
 ; DARWIN-32-PIC-NEXT: L91$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L91$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L91$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -6093,8 +6104,8 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: bat02:
-; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), %rax
 ; LINUX-64-PIC-NEXT: 	addq	$64, %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -6104,35 +6115,35 @@ entry:
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _bat02:
-; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat02:
 ; DARWIN-32-PIC: 	calll	L92$pb
 ; DARWIN-32-PIC-NEXT: L92$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L92$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L92$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bat02:
-; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RAX]]), %rax
 ; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _bat02:
-; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
 ; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _bat02:
-; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RAX]]), %rax
 ; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
@@ -6168,8 +6179,8 @@ entry:
 ; DARWIN-32-PIC: _bat03:
 ; DARWIN-32-PIC: 	calll	L93$pb
 ; DARWIN-32-PIC-NEXT: L93$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L93$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L93$pb)+64([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bat03:
@@ -6216,8 +6227,8 @@ entry:
 ; DARWIN-32-PIC: _bat04:
 ; DARWIN-32-PIC: 	calll	L94$pb
 ; DARWIN-32-PIC-NEXT: L94$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L94$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L94$pb)+64([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bat04:
@@ -6255,8 +6266,8 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: bat05:
-; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), %rax
 ; LINUX-64-PIC-NEXT: 	addq	$64, %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -6273,8 +6284,8 @@ entry:
 ; DARWIN-32-PIC: _bat05:
 ; DARWIN-32-PIC: 	calll	L95$pb
 ; DARWIN-32-PIC-NEXT: L95$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L95$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L95$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -6324,8 +6335,8 @@ entry:
 ; DARWIN-32-PIC: _bat06:
 ; DARWIN-32-PIC: 	calll	L96$pb
 ; DARWIN-32-PIC-NEXT: L96$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L96$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L96$pb)+64([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bat06:
@@ -6371,8 +6382,8 @@ entry:
 ; DARWIN-32-PIC: _bat07:
 ; DARWIN-32-PIC: 	calll	L97$pb
 ; DARWIN-32-PIC-NEXT: L97$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L97$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L97$pb)+64([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bat07:
@@ -6427,8 +6438,8 @@ entry:
 ; DARWIN-32-PIC: _bat08:
 ; DARWIN-32-PIC: 	calll	L98$pb
 ; DARWIN-32-PIC-NEXT: L98$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L98$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L98$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -6480,9 +6491,9 @@ entry:
 ; DARWIN-32-PIC: _bam00:
 ; DARWIN-32-PIC: 	calll	L99$pb
 ; DARWIN-32-PIC-NEXT: L99$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	popl	[[ECX:%e.x]]
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
-; DARWIN-32-PIC-NEXT: 	addl	L_src$non_lazy_ptr-L99$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_src$non_lazy_ptr-L99$pb([[ECX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam00:
@@ -6533,9 +6544,9 @@ entry:
 ; DARWIN-32-PIC: _bam01:
 ; DARWIN-32-PIC: 	calll	L100$pb
 ; DARWIN-32-PIC-NEXT: L100$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	popl	[[ECX:%e.x]]
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
-; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L100$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L100$pb([[ECX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam01:
@@ -6586,9 +6597,9 @@ entry:
 ; DARWIN-32-PIC: _bxm01:
 ; DARWIN-32-PIC: 	calll	L101$pb
 ; DARWIN-32-PIC-NEXT: L101$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	popl	[[ECX:%e.x]]
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
-; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L101$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L101$pb([[ECX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bxm01:
@@ -6629,9 +6640,9 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: bam02:
-; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
-; LINUX-64-PIC-NEXT: 	addq	(%rcx), %rax
+; LINUX-64-PIC-NEXT: 	addq	([[RCX]]), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _bam02:
@@ -6640,36 +6651,36 @@ entry:
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _bam02:
-; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, [[ECX:%e.x]]
 ; DARWIN-32-DYNAMIC-NEXT: 	movl	$262144, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	addl	(%ecx), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	([[ECX]]), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam02:
 ; DARWIN-32-PIC: 	calll	L102$pb
 ; DARWIN-32-PIC-NEXT: L102$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L102$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L102$pb([[EAX]]), [[ECX:%e.x]]
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
-; DARWIN-32-PIC-NEXT: 	addl	(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	addl	([[ECX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam02:
-; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
 ; DARWIN-64-STATIC-NEXT: 	movl	$262144, %eax
-; DARWIN-64-STATIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	([[RCX]]), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _bam02:
-; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
 ; DARWIN-64-DYNAMIC-NEXT: 	movl	$262144, %eax
-; DARWIN-64-DYNAMIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	([[RCX]]), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _bam02:
-; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
 ; DARWIN-64-PIC-NEXT: 	movl	$262144, %eax
-; DARWIN-64-PIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-PIC-NEXT: 	addq	([[RCX]]), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -6704,8 +6715,8 @@ entry:
 ; DARWIN-32-PIC: _bam03:
 ; DARWIN-32-PIC: 	calll	L103$pb
 ; DARWIN-32-PIC-NEXT: L103$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L103$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L103$pb)+262144([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam03:
@@ -6752,8 +6763,8 @@ entry:
 ; DARWIN-32-PIC: _bam04:
 ; DARWIN-32-PIC: 	calll	L104$pb
 ; DARWIN-32-PIC-NEXT: L104$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L104$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L104$pb)+262144([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam04:
@@ -6791,9 +6802,9 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: bam05:
-; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
-; LINUX-64-PIC-NEXT: 	addq	(%rcx), %rax
+; LINUX-64-PIC-NEXT: 	addq	([[RCX]]), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _bam05:
@@ -6809,9 +6820,9 @@ entry:
 ; DARWIN-32-PIC: _bam05:
 ; DARWIN-32-PIC: 	calll	L105$pb
 ; DARWIN-32-PIC-NEXT: L105$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	popl	[[ECX:%e.x]]
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
-; DARWIN-32-PIC-NEXT: 	addl	_dptr-L105$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	addl	_dptr-L105$pb([[ECX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam05:
@@ -6860,8 +6871,8 @@ entry:
 ; DARWIN-32-PIC: _bam06:
 ; DARWIN-32-PIC: 	calll	L106$pb
 ; DARWIN-32-PIC-NEXT: L106$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L106$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L106$pb)+262144([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam06:
@@ -6908,7 +6919,7 @@ entry:
 ; DARWIN-32-PIC: 	calll	L107$pb
 ; DARWIN-32-PIC-NEXT: L107$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L107$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L107$pb)+262144([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam07:
@@ -6963,9 +6974,9 @@ entry:
 ; DARWIN-32-PIC: _bam08:
 ; DARWIN-32-PIC: 	calll	L108$pb
 ; DARWIN-32-PIC-NEXT: L108$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	popl	[[ECX:%e.x]]
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
-; DARWIN-32-PIC-NEXT: 	addl	_lptr-L108$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	addl	_lptr-L108$pb([[ECX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam08:
@@ -6995,53 +7006,53 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cat00:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	src+64(,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	src+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cat00:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	src+64(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	src+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cat00:
-; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cat00:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_src+64(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_src+64(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cat00:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat00:
 ; DARWIN-32-PIC: 	calll	L109$pb
 ; DARWIN-32-PIC-NEXT: L109$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L109$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L109$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cat00:
-; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cat00:
-; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cat00:
-; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7056,53 +7067,53 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cxt00:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	xsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	xsrc+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cxt00:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	xsrc+64(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	xsrc+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cxt00:
-; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cxt00:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+64(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cxt00:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxt00:
 ; DARWIN-32-PIC: 	calll	L110$pb
 ; DARWIN-32-PIC-NEXT: L110$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L110$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L110$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cxt00:
-; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cxt00:
-; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cxt00:
-; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7117,53 +7128,53 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cat01:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	dst+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cat01:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	dst+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cat01:
-; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cat01:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cat01:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat01:
 ; DARWIN-32-PIC: 	calll	L111$pb
 ; DARWIN-32-PIC-NEXT: L111$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L111$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L111$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cat01:
-; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cat01:
-; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cat01:
-; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7178,53 +7189,53 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cxt01:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cxt01:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	xdst+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cxt01:
-; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cxt01:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cxt01:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxt01:
 ; DARWIN-32-PIC: 	calll	L112$pb
 ; DARWIN-32-PIC-NEXT: L112$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L112$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L112$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cxt01:
-; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cxt01:
-; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cxt01:
-; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7236,67 +7247,67 @@ entry:
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
 ; LINUX-64-STATIC: cat02:
-; LINUX-64-STATIC: movq    ptr(%rip), %rax
-; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: movq    ptr(%rip), [[RAX:%r.x]]
+; LINUX-64-STATIC: leaq    64([[RAX]],%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cat02:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
-; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cat02:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
-; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cat02:
-; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
-; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cat02:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cat02:
-; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat02:
 ; DARWIN-32-PIC: 	calll	L113$pb
 ; DARWIN-32-PIC-NEXT: L113$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L113$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L113$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cat02:
-; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cat02:
-; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cat02:
-; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7311,51 +7322,51 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cat03:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	dsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	dsrc+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cat03:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	dsrc+64(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	dsrc+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cat03:
-; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cat03:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+64(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cat03:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+64(,[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat03:
 ; DARWIN-32-PIC: 	calll	L114$pb
 ; DARWIN-32-PIC-NEXT: L114$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L114$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L114$pb)+64([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cat03:
-; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cat03:
-; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cat03:
-; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7370,51 +7381,51 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cat04:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cat04:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ddst+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cat04:
-; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cat04:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cat04:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat04:
 ; DARWIN-32-PIC: 	calll	L115$pb
 ; DARWIN-32-PIC-NEXT: L115$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L115$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L115$pb)+64([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cat04:
-; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cat04:
-; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cat04:
-; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7426,62 +7437,62 @@ entry:
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
 ; LINUX-64-STATIC: cat05:
-; LINUX-64-STATIC: movq    dptr(%rip), %rax
-; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: movq    dptr(%rip), [[RAX:%r.x]]
+; LINUX-64-STATIC: leaq    64([[RAX]],%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cat05:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
-; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cat05:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
-; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cat05:
-; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
-; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cat05:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cat05:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat05:
 ; DARWIN-32-PIC: 	calll	L116$pb
 ; DARWIN-32-PIC-NEXT: L116$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L116$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L116$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cat05:
-; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cat05:
-; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cat05:
-; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	movq	_dptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7496,51 +7507,51 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cat06:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	lsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	lsrc+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cat06:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	lsrc+64(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	lsrc+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cat06:
-; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cat06:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+64(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cat06:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+64(,[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat06:
 ; DARWIN-32-PIC: 	calll	L117$pb
 ; DARWIN-32-PIC-NEXT: L117$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L117$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L117$pb)+64([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cat06:
-; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cat06:
-; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cat06:
-; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7555,51 +7566,51 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cat07:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cat07:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ldst+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cat07:
-; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cat07:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cat07:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat07:
 ; DARWIN-32-PIC: 	calll	L118$pb
 ; DARWIN-32-PIC-NEXT: L118$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L118$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L118$pb)+64([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cat07:
-; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cat07:
-; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cat07:
-; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7611,61 +7622,61 @@ entry:
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
 ; LINUX-64-STATIC: cat08:
-; LINUX-64-STATIC: movq    lptr(%rip), %rax
-; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: movq    lptr(%rip), [[RAX:%r.x]]
+; LINUX-64-STATIC: leaq    64([[RAX]],%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cat08:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
-; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cat08:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
-; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cat08:
-; LINUX-64-PIC: 	movq	lptr(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	lptr(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cat08:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cat08:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat08:
 ; DARWIN-32-PIC: 	calll	L119$pb
 ; DARWIN-32-PIC-NEXT: L119$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L119$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L119$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	64([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cat08:
-; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cat08:
-; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cat08:
-; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	movq	_lptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7680,53 +7691,53 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cam00:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	src+262144(,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	src+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cam00:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	src+262144(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	src+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cam00:
-; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cam00:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_src+262144(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_src+262144(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cam00:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam00:
 ; DARWIN-32-PIC: 	calll	L120$pb
 ; DARWIN-32-PIC-NEXT: L120$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L120$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L120$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cam00:
-; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cam00:
-; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cam00:
-; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7741,53 +7752,53 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cxm00:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	xsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	xsrc+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cxm00:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	xsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	xsrc+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cxm00:
-; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cxm00:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+262144(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cxm00:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxm00:
 ; DARWIN-32-PIC: 	calll	L121$pb
 ; DARWIN-32-PIC-NEXT: L121$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L121$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L121$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cxm00:
-; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cxm00:
-; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cxm00:
-; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7802,53 +7813,53 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cam01:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cam01:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	dst+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cam01:
-; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cam01:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cam01:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam01:
 ; DARWIN-32-PIC: 	calll	L122$pb
 ; DARWIN-32-PIC-NEXT: L122$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L122$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L122$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cam01:
-; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cam01:
-; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cam01:
-; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7864,52 +7875,52 @@ entry:
 
 ; LINUX-32-STATIC: cxm01:
 ; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	xdst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cxm01:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	xdst+262144(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	xdst+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cxm01:
-; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cxm01:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_xdst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+262144(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cxm01:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxm01:
 ; DARWIN-32-PIC: 	calll	L123$pb
 ; DARWIN-32-PIC-NEXT: L123$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L123$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L123$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cxm01:
-; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cxm01:
-; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cxm01:
-; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7921,67 +7932,67 @@ entry:
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
 ; LINUX-64-STATIC: cam02:
-; LINUX-64-STATIC: movq    ptr(%rip), %rax
-; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: movq    ptr(%rip), [[RAX:%r.x]]
+; LINUX-64-STATIC: leaq    262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cam02:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
-; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cam02:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
-; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cam02:
-; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
-; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cam02:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cam02:
-; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam02:
 ; DARWIN-32-PIC: 	calll	L124$pb
 ; DARWIN-32-PIC-NEXT: L124$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L124$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L124$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cam02:
-; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cam02:
-; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cam02:
-; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -7996,51 +8007,51 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cam03:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	dsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	dsrc+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cam03:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	dsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	dsrc+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cam03:
-; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cam03:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+262144(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cam03:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+262144(,[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam03:
 ; DARWIN-32-PIC: 	calll	L125$pb
 ; DARWIN-32-PIC-NEXT: L125$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L125$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L125$pb)+262144([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cam03:
-; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cam03:
-; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cam03:
-; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8055,51 +8066,51 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cam04:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cam04:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cam04:
-; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cam04:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cam04:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam04:
 ; DARWIN-32-PIC: 	calll	L126$pb
 ; DARWIN-32-PIC-NEXT: L126$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L126$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L126$pb)+262144([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cam04:
-; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cam04:
-; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cam04:
-; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8111,62 +8122,62 @@ entry:
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
 ; LINUX-64-STATIC: cam05:
-; LINUX-64-STATIC: movq    dptr(%rip), %rax
-; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: movq    dptr(%rip), [[RAX:%r.x]]
+; LINUX-64-STATIC: leaq    262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cam05:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
-; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cam05:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
-; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cam05:
-; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
-; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cam05:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cam05:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam05:
 ; DARWIN-32-PIC: 	calll	L127$pb
 ; DARWIN-32-PIC-NEXT: L127$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_dptr-L127$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L127$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cam05:
-; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cam05:
-; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cam05:
-; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	movq	_dptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8181,51 +8192,51 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cam06:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	lsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	lsrc+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cam06:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	lsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	lsrc+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cam06:
-; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cam06:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+262144(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cam06:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+262144(,[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam06:
 ; DARWIN-32-PIC: 	calll	L128$pb
 ; DARWIN-32-PIC-NEXT: L128$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L128$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L128$pb)+262144([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cam06:
-; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cam06:
-; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cam06:
-; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8240,51 +8251,51 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cam07:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cam07:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cam07:
-; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cam07:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cam07:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam07:
 ; DARWIN-32-PIC: 	calll	L129$pb
 ; DARWIN-32-PIC-NEXT: L129$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L129$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L129$pb)+262144([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cam07:
-; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cam07:
-; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cam07:
-; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8296,61 +8307,61 @@ entry:
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
 ; LINUX-64-STATIC: cam08:
-; LINUX-64-STATIC: movq    lptr(%rip), %rax
-; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: movq    lptr(%rip), [[RAX:%r.x]]
+; LINUX-64-STATIC: leaq    262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: cam08:
-; LINUX-32-STATIC: 	movl	4(%esp), %eax
-; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
-; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-STATIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: cam08:
-; LINUX-32-PIC: 	movl	4(%esp), %eax
-; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
-; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
+; LINUX-32-PIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
+; LINUX-32-PIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: cam08:
-; LINUX-64-PIC: 	movq	lptr(%rip), %rax
-; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC: 	movq	lptr(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _cam08:
-; DARWIN-32-STATIC: 	movl	4(%esp), %eax
-; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
-; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-STATIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _cam08:
-; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
-; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
-; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), [[EAX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, [[ECX:%e.x]]
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam08:
 ; DARWIN-32-PIC: 	calll	L130$pb
 ; DARWIN-32-PIC-NEXT: L130$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
-; DARWIN-32-PIC-NEXT: 	movl	_lptr-L130$pb(%eax), %eax
-; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), [[ECX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L130$pb([[EAX]]), [[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	262144([[EAX]],[[ECX]],4), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _cam08:
-; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
-; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _cam08:
-; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
-; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _cam08:
-; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
-; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC: 	movq	_lptr(%rip), [[RAX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8648,8 +8659,8 @@ entry:
 ; DARWIN-32-PIC: _address:
 ; DARWIN-32-PIC: 	calll	L133$pb
 ; DARWIN-32-PIC-NEXT: L133$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_callee$non_lazy_ptr-L133$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_callee$non_lazy_ptr-L133$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _address:
@@ -8697,8 +8708,8 @@ entry:
 ; DARWIN-32-PIC: _laddress:
 ; DARWIN-32-PIC: 	calll	L134$pb
 ; DARWIN-32-PIC-NEXT: L134$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_lcallee-L134$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_lcallee-L134$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _laddress:
@@ -8744,8 +8755,8 @@ entry:
 ; DARWIN-32-PIC: _daddress:
 ; DARWIN-32-PIC: 	calll	L135$pb
 ; DARWIN-32-PIC-NEXT: L135$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	leal	_dcallee-L135$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	leal	_dcallee-L135$pb([[EAX]]), %eax
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _daddress:
@@ -9206,11 +9217,11 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: icaller:
-; LINUX-64-PIC: 	pushq	%rbx
-; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), %rbx
-; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
-; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
-; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC: 	pushq	[[RBX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; LINUX-64-PIC-NEXT: 	callq	*([[RBX]])
+; LINUX-64-PIC-NEXT: 	callq	*([[RBX]])
+; LINUX-64-PIC-NEXT: 	popq	[[RBX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _icaller:
@@ -9235,8 +9246,8 @@ entry:
 ; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
 ; DARWIN-32-PIC-NEXT: 	calll	L142$pb
 ; DARWIN-32-PIC-NEXT: L142$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L142$pb(%eax), %esi
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L142$pb([[EAX]]), %esi
 ; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
 ; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
 ; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
@@ -9244,27 +9255,27 @@ entry:
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _icaller:
-; DARWIN-64-STATIC: 	pushq	%rbx
-; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
-; DARWIN-64-STATIC-NEXT: 	callq	*(%rbx)
-; DARWIN-64-STATIC-NEXT: 	callq	*(%rbx)
-; DARWIN-64-STATIC-NEXT: 	popq	%rbx
+; DARWIN-64-STATIC: 	pushq	[[RBX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-STATIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-STATIC-NEXT: 	popq	[[RBX:%r.x]]
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _icaller:
-; DARWIN-64-DYNAMIC: 	pushq	%rbx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
-; DARWIN-64-DYNAMIC-NEXT: 	callq	*(%rbx)
-; DARWIN-64-DYNAMIC-NEXT: 	callq	*(%rbx)
-; DARWIN-64-DYNAMIC-NEXT: 	popq	%rbx
+; DARWIN-64-DYNAMIC: 	pushq	[[RBX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-DYNAMIC-NEXT: 	popq	[[RBX:%r.x]]
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _icaller:
-; DARWIN-64-PIC: 	pushq	%rbx
-; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
-; DARWIN-64-PIC-NEXT: 	callq	*(%rbx)
-; DARWIN-64-PIC-NEXT: 	callq	*(%rbx)
-; DARWIN-64-PIC-NEXT: 	popq	%rbx
+; DARWIN-64-PIC: 	pushq	[[RBX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-PIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-PIC-NEXT: 	popq	[[RBX:%r.x]]
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -9296,11 +9307,11 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: dicaller:
-; LINUX-64-PIC: 	pushq	%rbx
-; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), %rbx
-; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
-; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
-; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC: 	pushq	[[RBX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; LINUX-64-PIC-NEXT: 	callq	*([[RBX]])
+; LINUX-64-PIC-NEXT: 	callq	*([[RBX]])
+; LINUX-64-PIC-NEXT: 	popq	[[RBX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _dicaller:
@@ -9461,11 +9472,11 @@ entry:
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: itailcaller:
-; LINUX-64-PIC: 	pushq	%rbx
-; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), %rbx
-; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
-; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
-; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC: 	pushq	[[RBX:%r.x]]
+; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; LINUX-64-PIC-NEXT: 	callq	*([[RBX]])
+; LINUX-64-PIC-NEXT: 	callq	*([[RBX]])
+; LINUX-64-PIC-NEXT: 	popq	[[RBX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _itailcaller:
@@ -9490,8 +9501,8 @@ entry:
 ; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
 ; DARWIN-32-PIC-NEXT: 	calll	L145$pb
 ; DARWIN-32-PIC-NEXT: L145$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L145$pb(%eax), %esi
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L145$pb([[EAX]]), %esi
 ; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
 ; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
 ; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
@@ -9499,27 +9510,27 @@ entry:
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _itailcaller:
-; DARWIN-64-STATIC: 	pushq	%rbx
-; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
-; DARWIN-64-STATIC-NEXT: 	callq	*(%rbx)
-; DARWIN-64-STATIC-NEXT: 	callq	*(%rbx)
-; DARWIN-64-STATIC-NEXT: 	popq	%rbx
+; DARWIN-64-STATIC: 	pushq	[[RBX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; DARWIN-64-STATIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-STATIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-STATIC-NEXT: 	popq	[[RBX:%r.x]]
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _itailcaller:
-; DARWIN-64-DYNAMIC: 	pushq	%rbx
-; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
-; DARWIN-64-DYNAMIC-NEXT: 	callq	*(%rbx)
-; DARWIN-64-DYNAMIC-NEXT: 	callq	*(%rbx)
-; DARWIN-64-DYNAMIC-NEXT: 	popq	%rbx
+; DARWIN-64-DYNAMIC: 	pushq	[[RBX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-DYNAMIC-NEXT: 	popq	[[RBX:%r.x]]
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _itailcaller:
-; DARWIN-64-PIC: 	pushq	%rbx
-; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
-; DARWIN-64-PIC-NEXT: 	callq	*(%rbx)
-; DARWIN-64-PIC-NEXT: 	callq	*(%rbx)
-; DARWIN-64-PIC-NEXT: 	popq	%rbx
+; DARWIN-64-PIC: 	pushq	[[RBX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
+; DARWIN-64-PIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-PIC-NEXT: 	callq	*([[RBX]])
+; DARWIN-64-PIC-NEXT: 	popq	[[RBX:%r.x]]
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -9547,8 +9558,8 @@ entry:
 
 ; LINUX-64-PIC: ditailcaller:
 ; LINUX-64-PIC: 	pushq
-; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), %rax
-; LINUX-64-PIC-NEXT: 	callq	*(%rax)
+; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), [[RAX:%r.x]]
+; LINUX-64-PIC-NEXT: 	callq	*([[RAX]])
 ; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -9568,8 +9579,8 @@ entry:
 ; DARWIN-32-PIC: 	subl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	calll	L146$pb
 ; DARWIN-32-PIC-NEXT: L146$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	calll	*_difunc-L146$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	calll	*_difunc-L146$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
@@ -9635,8 +9646,8 @@ entry:
 ; DARWIN-32-PIC: 	subl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	calll	L147$pb
 ; DARWIN-32-PIC-NEXT: L147$pb:
-; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	calll	*_lifunc-L147$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	popl	[[EAX:%e.x]]
+; DARWIN-32-PIC-NEXT: 	calll	*_lifunc-L147$pb([[EAX]])
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 62c8980..b95e5b5 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -133,3 +133,18 @@ define i32 @test9(i32 %x, i32 %y) nounwind readnone {
 ; X64: subl
 ; X64: ret
 }
+
+define i1 @test10(i32 %x) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %x, i32 1)
+  %obit = extractvalue {i32, i1} %t, 1
+  ret i1 %obit
+
+; X32: test10:
+; X32: incl
+; X32-NEXT: seto
+
+; X64: test10:
+; X64: incl
+; X64-NEXT: seto
+}
diff --git a/test/CodeGen/X86/adde-carry.ll b/test/CodeGen/X86/adde-carry.ll
new file mode 100644
index 0000000..98c4f99
--- /dev/null
+++ b/test/CodeGen/X86/adde-carry.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s -check-prefix=CHECK-64
+; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=CHECK-32
+
+define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+ %0 = zext i64 %a to i128
+ %1 = zext i64 %b to i128
+ %2 = add i128 %1, %0
+ %3 = zext i64 %c to i128
+ %4 = shl i128 %3, 64
+ %5 = add i128 %4, %2
+ %6 = lshr i128 %5, 64
+ %7 = trunc i128 %6 to i64
+ store i64 %7, i64* %s, align 8
+ %8 = trunc i128 %2 to i64
+ store i64 %8, i64* %t, align 8
+ ret void
+
+; CHECK-32: addl
+; CHECK-32: adcl
+; CHECK-32: adcl $0
+; CHECK-32: adcl $0
+
+; CHECK-64: addq
+; CHECK-64: adcq $0
+}
diff --git a/test/CodeGen/X86/aliases.ll b/test/CodeGen/X86/aliases.ll
index 3ed3bd6..f920279 100644
--- a/test/CodeGen/X86/aliases.ll
+++ b/test/CodeGen/X86/aliases.ll
@@ -1,6 +1,4 @@
 ; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=false -o %t
-; RUN: grep { = } %t   | count 16
-; RUN: grep set %t   | count 18
 ; RUN: grep globl %t | count 6
 ; RUN: grep weak %t  | count 1
 ; RUN: grep hidden %t | count 1
diff --git a/test/CodeGen/X86/alignment.ll b/test/CodeGen/X86/alignment.ll
index 9678e6d..7e91115 100644
--- a/test/CodeGen/X86/alignment.ll
+++ b/test/CodeGen/X86/alignment.ll
@@ -6,7 +6,7 @@
 
 ; CHECK:	.bss
 ; CHECK:	.globl	GlobalA
-; CHECK:	.align	16
+; CHECK:	.align	8
 ; CHECK: GlobalA:
 ; CHECK:	.zero	384
 
@@ -15,12 +15,12 @@
 ; PR6921
 @GlobalB = common global { [384 x i8] } zeroinitializer, align 8
 
-; CHECK: 	.comm	GlobalB,384,16
+; CHECK: 	.comm	GlobalB,384,8
 
 
 @GlobalC = common global { [384 x i8] } zeroinitializer, align 2
 
-; CHECK: 	.comm	GlobalC,384,16
+; CHECK: 	.comm	GlobalC,384,2
 
 
 
diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll
index d0c64f2..b514cf6 100644
--- a/test/CodeGen/X86/apm.ll
+++ b/test/CodeGen/X86/apm.ll
@@ -1,10 +1,16 @@
-; RUN: llc < %s -o - -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
 ; PR8573
 
 ; CHECK: foo:
 ; CHECK: leaq    (%rdi), %rax
 ; CHECK-NEXT: movl    %esi, %ecx
 ; CHECK-NEXT: monitor
+; WIN64: foo:
+; WIN64:      leaq    (%rcx), %rax
+; WIN64-NEXT: movl    %edx, %ecx
+; WIN64-NEXT: movl    %r8d, %edx
+; WIN64-NEXT: monitor
 define void @foo(i8* %P, i32 %E, i32 %H) nounwind {
 entry:
   tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
@@ -17,6 +23,9 @@ declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
 ; CHECK: movl    %edi, %ecx
 ; CHECK-NEXT: movl    %esi, %eax
 ; CHECK-NEXT: mwait
+; WIN64: bar:
+; WIN64:      movl    %edx, %eax
+; WIN64-NEXT: mwait
 define void @bar(i32 %E, i32 %H) nounwind {
 entry:
   tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
diff --git a/test/CodeGen/X86/avoid-lea-scale2.ll b/test/CodeGen/X86/avoid-lea-scale2.ll
index 8003de2..cee2ee4 100644
--- a/test/CodeGen/X86/avoid-lea-scale2.ll
+++ b/test/CodeGen/X86/avoid-lea-scale2.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86-64 | grep {leal.*-2(\[%\]rdi,\[%\]rdi)}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK: leal -2({{%rdi,%rdi|%rcx,%rcx}})
 
 define i32 @foo(i32 %x) nounwind readnone {
   %t0 = shl i32 %x, 1
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 6c32396..5201688 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -247,7 +247,7 @@ declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind
 
 define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) {
   ; CHECK: movl
-  ; CHECK: vmovdqu
+  ; CHECK: vmovups
   %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
   ret <16 x i8> %res
 }
@@ -256,7 +256,7 @@ declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly
 
 define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) {
   ; CHECK: movl
-  ; CHECK: vmovupd
+  ; CHECK: vmovups
   %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
 }
diff --git a/test/CodeGen/X86/bool-zext.ll b/test/CodeGen/X86/bool-zext.ll
new file mode 100644
index 0000000..d2c30c6
--- /dev/null
+++ b/test/CodeGen/X86/bool-zext.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK: @bar1
+; CHECK: movzbl
+; CHECK: callq
+define void @bar1(i1 zeroext %v1) nounwind ssp {
+entry:
+  %conv = zext i1 %v1 to i32
+  %call = tail call i32 (...)* @foo1(i32 %conv) nounwind
+  ret void
+}
+
+; CHECK: @bar2
+; CHECK-NOT: movzbl
+; CHECK: callq
+define void @bar2(i8 zeroext %v1) nounwind ssp {
+entry:
+  %conv = zext i8 %v1 to i32
+  %call = tail call i32 (...)* @foo1(i32 %conv) nounwind
+  ret void
+}
+
+; CHECK: @bar3
+; CHECK: callq
+; CHECK-NOT: movzbl
+; CHECK-NOT: and
+; CHECK: ret
+define zeroext i1 @bar3() nounwind ssp {
+entry:
+  %call = call i1 @foo2() nounwind
+  ret i1 %call
+}
+
+declare i32 @foo1(...)
+declare zeroext i1 @foo2()
diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll
index 972b3cd..93b2043 100644
--- a/test/CodeGen/X86/break-anti-dependencies.ll
+++ b/test/CodeGen/X86/break-anti-dependencies.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=none > %t
+; Without list-burr scheduling we may not see the difference in codegen here.
+; RUN: llc < %s -march=x86-64 -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
 ; RUN:   grep {%xmm0} %t | count 14
 ; RUN:   not grep {%xmm1} %t
 ; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=critical > %t
diff --git a/test/CodeGen/X86/byval.ll b/test/CodeGen/X86/byval.ll
index ac0bc09..185eda1 100644
--- a/test/CodeGen/X86/byval.ll
+++ b/test/CodeGen/X86/byval.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 | FileCheck -check-prefix=X86-64 %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck -check-prefix=X86-64 %s
+; Win64 has not supported byval yet.
 ; RUN: llc < %s -march=x86 | FileCheck -check-prefix=X86 %s
 
 ; X86: movl	4(%esp), %eax
diff --git a/test/CodeGen/X86/byval2.ll b/test/CodeGen/X86/byval2.ll
index 71129f5..03a9f0f 100644
--- a/test/CodeGen/X86/byval2.ll
+++ b/test/CodeGen/X86/byval2.ll
@@ -1,5 +1,28 @@
-; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
-; RUN: llc < %s -march=x86    | grep rep.movsl | count 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; Win64 has not supported byval yet.
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
 
 %struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
                    i64, i64, i64, i64, i64, i64, i64, i64,
diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll
index 504e0be..8d5bb6d 100644
--- a/test/CodeGen/X86/byval3.ll
+++ b/test/CodeGen/X86/byval3.ll
@@ -1,5 +1,28 @@
-; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
-; RUN: llc < %s -march=x86 | grep rep.movsl | count 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; Win64 has not supported byval yet.
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
 
 %struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
                    i32, i32, i32, i32, i32, i32, i32, i32,
diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll
index 4db9d65..ae1a79a 100644
--- a/test/CodeGen/X86/byval4.ll
+++ b/test/CodeGen/X86/byval4.ll
@@ -1,5 +1,28 @@
-; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
-; RUN: llc < %s -march=x86 | grep rep.movsl	 | count 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; Win64 has not supported byval yet.
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
 
 %struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
                    i16, i16, i16, i16, i16, i16, i16, i16,
diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll
index 69c115b..a376709 100644
--- a/test/CodeGen/X86/byval5.ll
+++ b/test/CodeGen/X86/byval5.ll
@@ -1,5 +1,28 @@
-; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
-; RUN: llc < %s -march=x86 | grep rep.movsl	 | count 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; Win64 has not supported byval yet.
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
 
 %struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
                    i8, i8, i8, i8, i8, i8, i8, i8,
diff --git a/test/CodeGen/X86/call-push.ll b/test/CodeGen/X86/call-push.ll
index 02cbccc..8cca10c 100644
--- a/test/CodeGen/X86/call-push.ll
+++ b/test/CodeGen/X86/call-push.ll
@@ -27,3 +27,19 @@ UnifiedReturnBlock:             ; preds = %entry
 }
 
 declare i32 @f(%struct.decode_t*)
+
+
+; There should be no store for the undef operand.
+
+; CHECK: _test2:
+; CHECK-NOT: 8(%esp)
+; CHECK: 4(%esp)
+; CHECK-NOT: 8(%esp)
+; CHECK: calll 
+declare i32 @foo(i32, i32, i32)
+
+define void @test2() nounwind {
+entry:
+  %call = call i32 @foo(i32 8, i32 6, i32 undef)
+  ret void
+}
diff --git a/test/CodeGen/X86/coalesce-esp.ll b/test/CodeGen/X86/coalesce-esp.ll
index e0f2796..a584876 100644
--- a/test/CodeGen/X86/coalesce-esp.ll
+++ b/test/CodeGen/X86/coalesce-esp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | grep {movl	%esp, %ecx}
+; RUN: llc < %s | grep {movl	%esp, %ebp}
 ; PR4572
 
 ; Don't coalesce with %esp if it would end up putting %esp in
diff --git a/test/CodeGen/X86/coalescer-commute2.ll b/test/CodeGen/X86/coalescer-commute2.ll
index 5d10bba..7306920 100644
--- a/test/CodeGen/X86/coalescer-commute2.ll
+++ b/test/CodeGen/X86/coalescer-commute2.ll
@@ -1,5 +1,10 @@
-; RUN: llc < %s -march=x86-64 | grep paddw | count 2
-; RUN: llc < %s -march=x86-64 | not grep mov
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     mov
+; CHECK:     paddw
+; CHECK-NOT:     mov
+; CHECK:     paddw
+; CHECK-NOT:     paddw
+; CHECK-NOT:     mov
 
 ; The 2-addr pass should ensure that identical code is produced for these functions
 ; no extra copy should be generated.
diff --git a/test/CodeGen/X86/coalescer-cross.ll b/test/CodeGen/X86/coalescer-cross.ll
index 7d6f399..976db64 100644
--- a/test/CodeGen/X86/coalescer-cross.ll
+++ b/test/CodeGen/X86/coalescer-cross.ll
@@ -1,6 +1,10 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 | not grep movaps
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -regalloc=basic | FileCheck %s
 ; rdar://6509240
 
+; CHECK: os_clock
+; CHECK-NOT: movaps
+
 	type { %struct.TValue }		; type %0
 	type { %struct.L_Umaxalign, i32, %struct.Node* }		; type %1
 	%struct.CallInfo = type { %struct.TValue*, %struct.TValue*, %struct.TValue*, i32*, i32, i32 }
diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll
index 89b436e..ef44a3d 100644
--- a/test/CodeGen/X86/commute-two-addr.ll
+++ b/test/CodeGen/X86/commute-two-addr.ll
@@ -38,11 +38,10 @@ define i32 @t2(i32 %X, i32 %Y) nounwind {
 define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind {
 entry:
 ; DARWIN: t3:
+; DARWIN: shll $16
 ; DARWIN: shlq $32, %rcx
 ; DARWIN-NOT: leaq
 ; DARWIN: orq %rcx, %rax
-; DARWIN-NOT: mov
-; DARWIN: shll $16
   %tmp21 = zext i32 %lb to i64
   %tmp23 = zext i32 %ub to i64
   %tmp24 = shl i64 %tmp23, 32
diff --git a/test/CodeGen/X86/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll
index 2a44463..4be14d2 100644
--- a/test/CodeGen/X86/constant-pool-remat-0.ll
+++ b/test/CodeGen/X86/constant-pool-remat-0.ll
@@ -1,7 +1,16 @@
-; RUN: llc < %s -march=x86-64 | grep LCPI | count 3
-; RUN: llc < %s -march=x86-64 -o /dev/null -stats  -info-output-file - | grep asm-printer | grep 6
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep LCPI | count 3
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o /dev/null -stats  -info-output-file - | grep asm-printer | grep 12
+; RUN: llc < %s -mtriple=x86_64-linux   | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -regalloc=greedy | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; CHECK:     LCPI
+; CHECK:     LCPI
+; CHECK:     LCPI
+; CHECK-NOT: LCPI
+
+; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats -info-output-file - | FileCheck %s -check-prefix=X64stat
+; X64stat: 6 asm-printer
+
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o /dev/null -stats -info-output-file - | FileCheck %s -check-prefix=X32stat
+; X32stat: 12 asm-printer
 
 declare float @qux(float %y)
 
diff --git a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
index 17cb2b3..b82348b 100644
--- a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
+++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
@@ -1,6 +1,10 @@
-; RUN: llc < %s -march=x86-64 -o %t -stats -info-output-file - | \
-; RUN:   grep {asm-printer} | grep {Number of machine instrs printed} | grep 9
-; RUN: grep {leal	1(\%rsi),} %t
+; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; RUN: llc < %s -mtriple=x86_64-win32 -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; STATS: 9 asm-printer
+
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK: leal 1({{%rsi|%rdx}}),
 
 define fastcc zeroext i8 @fullGtU(i32 %i1, i32 %i2, i8* %ptr) nounwind optsize {
 entry:
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 2d8e63e..7c4e64c 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -189,7 +189,7 @@ for.inc44:                                        ; preds = %for.body
 }
 
 ; PR9028
-define void @f(i64 %A) nounwind {
+define void @func_60(i64 %A) nounwind {
 entry:
   %0 = zext i64 %A to i160
   %1 = shl i160 %0, 64
@@ -199,3 +199,19 @@ entry:
   store i576 %4, i576* undef, align 8
   ret void
 }
+
+; <rdar://problem/9187792>
+define fastcc void @func_61() nounwind sspreq {
+entry:
+  %t1 = tail call i64 @llvm.objectsize.i64(i8* undef, i1 false)
+  %t2 = icmp eq i64 %t1, -1
+  br i1 %t2, label %bb2, label %bb1
+
+bb1:
+  ret void
+
+bb2:
+  ret void
+}
+
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
diff --git a/test/CodeGen/X86/dbg-declare-arg.ll b/test/CodeGen/X86/dbg-declare-arg.ll
new file mode 100644
index 0000000..367c1ef
--- /dev/null
+++ b/test/CodeGen/X86/dbg-declare-arg.ll
@@ -0,0 +1,123 @@
+; RUN: llc -O0 -fast-isel=false < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.7"
+;Radar 9321650
+
+;CHECK: ##DEBUG_VALUE: my_a 
+
+%class.A = type { i32, i32, i32, i32 }
+
+define void @_Z3fooi(%class.A* sret %agg.result, i32 %i) ssp {
+entry:
+  %i.addr = alloca i32, align 4
+  %j = alloca i32, align 4
+  %nrvo = alloca i1
+  %cleanup.dest.slot = alloca i32
+  store i32 %i, i32* %i.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !26), !dbg !27
+  call void @llvm.dbg.declare(metadata !{i32* %j}, metadata !28), !dbg !30
+  store i32 0, i32* %j, align 4, !dbg !31
+  %tmp = load i32* %i.addr, align 4, !dbg !32
+  %cmp = icmp eq i32 %tmp, 42, !dbg !32
+  br i1 %cmp, label %if.then, label %if.end, !dbg !32
+
+if.then:                                          ; preds = %entry
+  %tmp1 = load i32* %i.addr, align 4, !dbg !33
+  %add = add nsw i32 %tmp1, 1, !dbg !33
+  store i32 %add, i32* %j, align 4, !dbg !33
+  br label %if.end, !dbg !35
+
+if.end:                                           ; preds = %if.then, %entry
+  store i1 false, i1* %nrvo, !dbg !36
+  call void @llvm.dbg.declare(metadata !{%class.A* %agg.result}, metadata !37), !dbg !39
+  %tmp2 = load i32* %j, align 4, !dbg !40
+  %x = getelementptr inbounds %class.A* %agg.result, i32 0, i32 0, !dbg !40
+  store i32 %tmp2, i32* %x, align 4, !dbg !40
+  store i1 true, i1* %nrvo, !dbg !41
+  store i32 1, i32* %cleanup.dest.slot
+  %nrvo.val = load i1* %nrvo, !dbg !42
+  br i1 %nrvo.val, label %nrvo.skipdtor, label %nrvo.unused, !dbg !42
+
+nrvo.unused:                                      ; preds = %if.end
+  call void @_ZN1AD1Ev(%class.A* %agg.result), !dbg !42
+  br label %nrvo.skipdtor, !dbg !42
+
+nrvo.skipdtor:                                    ; preds = %nrvo.unused, %if.end
+  ret void, !dbg !42
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define linkonce_odr void @_ZN1AD1Ev(%class.A* %this) unnamed_addr ssp align 2 {
+entry:
+  %this.addr = alloca %class.A*, align 8
+  store %class.A* %this, %class.A** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !43), !dbg !44
+  %this1 = load %class.A** %this.addr
+  call void @_ZN1AD2Ev(%class.A* %this1)
+  ret void, !dbg !45
+}
+
+define linkonce_odr void @_ZN1AD2Ev(%class.A* %this) unnamed_addr nounwind ssp align 2 {
+entry:
+  %this.addr = alloca %class.A*, align 8
+  store %class.A* %this, %class.A** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !46), !dbg !47
+  %this1 = load %class.A** %this.addr
+  %x = getelementptr inbounds %class.A* %this1, i32 0, i32 0, !dbg !48
+  store i32 1, i32* %x, align 4, !dbg !48
+  ret void, !dbg !48
+}
+
+!llvm.dbg.sp = !{!0, !10, !14, !19, !22, !25}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"~A", metadata !"~A", metadata !"", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589826, metadata !2, metadata !"A", metadata !3, i32 2, i64 128, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null} ; [ DW_TAG_class_type ]
+!2 = metadata !{i32 589841, i32 0, i32 4, metadata !"a.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 130127)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589865, metadata !"a.cc", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!4 = metadata !{metadata !5, metadata !7, metadata !8, metadata !9, metadata !0, metadata !10, metadata !14}
+!5 = metadata !{i32 589837, metadata !3, metadata !"x", metadata !3, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 589837, metadata !3, metadata !"y", metadata !3, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !6} ; [ DW_TAG_member ]
+!8 = metadata !{i32 589837, metadata !3, metadata !"z", metadata !3, i32 2, i64 32, i64 32, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!9 = metadata !{i32 589837, metadata !3, metadata !"o", metadata !3, i32 2, i64 32, i64 32, i64 96, i32 0, metadata !6} ; [ DW_TAG_member ]
+!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"A", metadata !"A", metadata !"", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{null, metadata !13}
+!13 = metadata !{i32 589839, metadata !2, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 589870, i32 0, metadata !1, metadata !"A", metadata !"A", metadata !"", metadata !3, i32 2, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{null, metadata !13, metadata !17}
+!17 = metadata !{i32 589840, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_reference_type ]
+!18 = metadata !{i32 589862, metadata !2, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_const_type ]
+!19 = metadata !{i32 589870, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", metadata !3, i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*, i32)* @_Z3fooi, null, null} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!21 = metadata !{metadata !1}
+!22 = metadata !{i32 589870, i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD1Ev", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD1Ev, null, null} ; [ DW_TAG_subprogram ]
+!23 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!24 = metadata !{null}
+!25 = metadata !{i32 589870, i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD2Ev", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, null} ; [ DW_TAG_subprogram ]
+!26 = metadata !{i32 590081, metadata !19, metadata !"i", metadata !3, i32 16777220, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!27 = metadata !{i32 4, i32 11, metadata !19, null}
+!28 = metadata !{i32 590080, metadata !29, metadata !"j", metadata !3, i32 5, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 589835, metadata !19, i32 4, i32 14, metadata !3, i32 0} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 5, i32 7, metadata !29, null}
+!31 = metadata !{i32 5, i32 12, metadata !29, null}
+!32 = metadata !{i32 6, i32 3, metadata !29, null}
+!33 = metadata !{i32 7, i32 5, metadata !34, null}
+!34 = metadata !{i32 589835, metadata !29, i32 6, i32 16, metadata !3, i32 1} ; [ DW_TAG_lexical_block ]
+!35 = metadata !{i32 8, i32 3, metadata !34, null}
+!36 = metadata !{i32 9, i32 9, metadata !29, null}
+!37 = metadata !{i32 590080, metadata !29, metadata !"my_a", metadata !3, i32 9, metadata !38, i32 0} ; [ DW_TAG_auto_variable ]
+!38 = metadata !{i32 589840, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
+!39 = metadata !{i32 9, i32 5, metadata !29, null}
+!40 = metadata !{i32 10, i32 3, metadata !29, null}
+!41 = metadata !{i32 11, i32 3, metadata !29, null}
+!42 = metadata !{i32 12, i32 1, metadata !29, null}
+!43 = metadata !{i32 590081, metadata !22, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64} ; [ DW_TAG_arg_variable ]
+!44 = metadata !{i32 2, i32 47, metadata !22, null}
+!45 = metadata !{i32 2, i32 61, metadata !22, null}
+!46 = metadata !{i32 590081, metadata !25, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64} ; [ DW_TAG_arg_variable ]
+!47 = metadata !{i32 2, i32 47, metadata !25, null}
+!48 = metadata !{i32 2, i32 54, metadata !49, null}
+!49 = metadata !{i32 589835, metadata !25, i32 2, i32 52, metadata !3, i32 2} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/dbg-file-name.ll b/test/CodeGen/X86/dbg-file-name.ll
new file mode 100644
index 0000000..e7d5f92
--- /dev/null
+++ b/test/CodeGen/X86/dbg-file-name.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple x86_64-apple-darwin10.0.0  < %s | FileCheck %s
+
+; Radar 8884898
+; CHECK: file	1 "/Users/manav/one/two/simple.c"
+
+declare i32 @printf(i8*, ...) nounwind
+
+define i32 @main() nounwind {
+  ret i32 0
+}
+
+!llvm.dbg.sp = !{ !6}
+
+!1 = metadata !{i32 589865, metadata !"simple.c", metadata !"/Users/manav/one/two", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"simple.c", metadata !"/Users/manav/one/two", metadata !"LLVM build 00", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 589860, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !5}
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index 83df147..76b93dd 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -1,10 +1,11 @@
 ; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -regalloc=basic | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin8"
 
 ;CHECK: Ldebug_loc0:
 ;CHECK-NEXT:	.quad	Lfunc_begin0
-;CHECK-NEXT:	.quad	Lfunc_end0
+;CHECK-NEXT:	.quad	L
 ;CHECK-NEXT:	.short	1                       ## Loc expr size
 ;CHECK-NEXT:	.byte	85                      ## DW_OP_reg5
 ;CHECK-NEXT:	.quad	0
diff --git a/test/CodeGen/X86/dbg-value-inlined-parameter.ll b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
index 89bbf34..481c4ba 100644
--- a/test/CodeGen/X86/dbg-value-inlined-parameter.ll
+++ b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin -regalloc=basic < %s | FileCheck %s
 
 ;CHECK: DW_TAG_inlined_subroutine
 ;CHECK-NEXT: DW_AT_abstract_origin
diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/CodeGen/X86/dbg-value-location.ll
index 87d7e91..a0e4d16 100644
--- a/test/CodeGen/X86/dbg-value-location.ll
+++ b/test/CodeGen/X86/dbg-value-location.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -regalloc=basic | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 ;Radar 8950491
diff --git a/test/CodeGen/X86/dbg-value-range.ll b/test/CodeGen/X86/dbg-value-range.ll
index 2985224..da49f2d 100644
--- a/test/CodeGen/X86/dbg-value-range.ll
+++ b/test/CodeGen/X86/dbg-value-range.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin10 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin10 -regalloc=basic < %s | FileCheck %s
 
 %struct.a = type { i32 }
 
@@ -41,15 +42,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !18 = metadata !{i32 7, i32 2, metadata !12, null}
 !19 = metadata !{i32 8, i32 2, metadata !12, null}
 
-; check that variable bar:b value range is appropriately trucated in debug info. Here Ltmp5 is end of
-; location range.
+; Check that variable bar:b value range is appropriately trucated in debug info.
+; The variable is in %rdi which is clobbered by 'movl %ebx, %edi'
+; Here Ltmp7 is the end of the location range.
 
-;CHECK:Ltmp6
-;CHECK-NEXT: DEBUG_VALUE: bar:b <- undef
+;CHECK: .loc	1 7 2
+;CHECK: movl
+;CHECK-NEXT: [[CLOBBER:Ltmp[0-9]*]]
 
 ;CHECK:Ldebug_loc0:
-;CHECK-NEXT:	.quad	Ltmp
-;CHECK-NEXT:	.quad	Ltmp6
+;CHECK-NEXT:	.quad
+;CHECK-NEXT:	.quad	[[CLOBBER]]
 ;CHECK-NEXT:	.short	1
 ;CHECK-NEXT:	.byte	85
 ;CHECK-NEXT:	.quad	0
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index fe335b9..08e3272 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -60,3 +60,14 @@ entry:
 ; CHECK: shrl	$31, %ecx
 ; CHECK: sarl	$18, %eax
 }
+
+define i32 @test7(i32 %x) nounwind {
+  %div = udiv i32 %x, 28
+  ret i32 %div
+; CHECK: test7:
+; CHECK: shrl $2
+; CHECK: movl $613566757
+; CHECK: mull
+; CHECK-NOT: shrl
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/dyn-stackalloc.ll b/test/CodeGen/X86/dyn-stackalloc.ll
index e577611..7b0fe18 100644
--- a/test/CodeGen/X86/dyn-stackalloc.ll
+++ b/test/CodeGen/X86/dyn-stackalloc.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -mtriple=i686-linux | not egrep {\\\$4294967289|-7}
-; RUN: llc < %s -mtriple=i686-linux | egrep {\\\$4294967280|-16}
-; RUN: llc < %s -mtriple=x86_64-linux | grep {\\-16}
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefix=X32
+; X32-NOT:     {{$429496728|-7}}
+; X32:     {{$4294967280|-16}}
+; X32-NOT:     {{$429496728|-7}}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64:     -16
 
 define void @t() nounwind {
 A:
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index fbe0243..48abfd0 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -87,4 +87,23 @@ define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind {
 ; X64-NEXT: ret
 }
 
+; PR9500, rdar://9156159 - Don't do non-local address mode folding,
+; because it may require values which wouldn't otherwise be live out
+; of their blocks.
+define void @test6() {
+if.end:                                           ; preds = %if.then, %invoke.cont
+  %tmp15 = load i64* undef
+  %dec = add i64 %tmp15, 13
+  store i64 %dec, i64* undef
+  %call17 = invoke i8* @_ZNK18G__FastAllocString4dataEv()
+          to label %invoke.cont16 unwind label %lpad
 
+invoke.cont16:                                    ; preds = %if.then14
+  %arrayidx18 = getelementptr inbounds i8* %call17, i64 %dec
+  store i8 0, i8* %arrayidx18
+  unreachable
+
+lpad:                                             ; preds = %if.end19, %if.then14, %if.end, %entry
+  unreachable
+}
+declare i8* @_ZNK18G__FastAllocString4dataEv() nounwind
diff --git a/test/CodeGen/X86/fast-isel-i1.ll b/test/CodeGen/X86/fast-isel-i1.ll
index d066578..5d572c1 100644
--- a/test/CodeGen/X86/fast-isel-i1.ll
+++ b/test/CodeGen/X86/fast-isel-i1.ll
@@ -1,19 +1,40 @@
-; RUN: llc < %s -march=x86 -fast-isel | grep {andb	\$1, %}
+; RUN: llc < %s -march=x86 -fast-isel | FileCheck %s
 
-declare i64 @bar(i64)
+declare i64 @test1a(i64)
 
-define i32 @foo(i64 %x) nounwind {
-	%y = add i64 %x, -3		; <i64> [#uses=1]
-	%t = call i64 @bar(i64 %y)		; <i64> [#uses=1]
-	%s = mul i64 %t, 77		; <i64> [#uses=1]
-	%z = trunc i64 %s to i1		; <i1> [#uses=1]
+define i32 @test1(i64 %x) nounwind {
+; CHECK: test1:
+; CHECK: andb $1, %
+	%y = add i64 %x, -3
+	%t = call i64 @test1a(i64 %y)
+	%s = mul i64 %t, 77
+	%z = trunc i64 %s to i1
 	br label %next
 
 next:		; preds = %0
-	%u = zext i1 %z to i32		; <i32> [#uses=1]
-	%v = add i32 %u, 1999		; <i32> [#uses=1]
+	%u = zext i1 %z to i32
+	%v = add i32 %u, 1999
 	br label %exit
 
 exit:		; preds = %next
 	ret i32 %v
 }
+
+define void @test2(i8* %a) nounwind {
+entry:
+; CHECK: test2:
+; CHECK: movb {{.*}} %al
+; CHECK-NEXT: xorb $1, %al
+; CHECK-NEXT: testb $1
+  %tmp = load i8* %a, align 1
+  %tobool = trunc i8 %tmp to i1
+  %tobool2 = xor i1 %tobool, true
+  br i1 %tobool2, label %if.then, label %if.end
+
+if.then:
+  call void @test2(i8* null)
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/test/CodeGen/X86/fast-isel-shift-imm.ll b/test/CodeGen/X86/fast-isel-shift-imm.ll
deleted file mode 100644
index 5c62c18..0000000
--- a/test/CodeGen/X86/fast-isel-shift-imm.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=x86 -O0 | grep {sarl	\$80, %e}
-; PR3242
-
-define void @foo(i32 %x, i32* %p) nounwind {
-  %y = ashr i32 %x, 50000
-  store i32 %y, i32* %p
-  ret void
-}
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
new file mode 100644
index 0000000..c4afc10
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -0,0 +1,262 @@
+; RUN: llc < %s  -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; Make sure that fast-isel folds the immediate into the binop even though it
+; is non-canonical.
+define i32 @test1(i32 %i) nounwind ssp {
+  %and = and i32 8, %i
+  ret i32 %and
+}
+
+; CHECK: test1:
+; CHECK: andl	$8, 
+
+
+; rdar://9289512 - The load should fold into the compare.
+define void @test2(i64 %x) nounwind ssp {
+entry:
+  %x.addr = alloca i64, align 8
+  store i64 %x, i64* %x.addr, align 8
+  %tmp = load i64* %x.addr, align 8
+  %cmp = icmp sgt i64 %tmp, 42
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+; CHECK: test2:
+; CHECK: movq	%rdi, -8(%rsp)
+; CHECK: cmpq	$42, -8(%rsp)
+}
+
+
+
+
+@G = external global i32
+define i64 @test3() nounwind {
+  %A = ptrtoint i32* @G to i64
+  ret i64 %A
+; CHECK: test3:
+; CHECK: movq _G@GOTPCREL(%rip), %rax
+; CHECK-NEXT: ret
+}
+
+
+
+; rdar://9289558
+@rtx_length = external global [153 x i8]
+
+define i32 @test4(i64 %idxprom9) nounwind {
+  %arrayidx10 = getelementptr inbounds [153 x i8]* @rtx_length, i32 0, i64 %idxprom9
+  %tmp11 = load i8* %arrayidx10, align 1
+  %conv = zext i8 %tmp11 to i32
+  ret i32 %conv
+
+; CHECK: test4:
+; CHECK: movq	_rtx_length@GOTPCREL(%rip), %rax
+; CHECK-NEXT: movzbl	(%rax,%rdi), %eax
+; CHECK-NEXT: ret
+}
+
+
+; PR3242 - Out of range shifts should not be folded by fastisel.
+define void @test5(i32 %x, i32* %p) nounwind {
+  %y = ashr i32 %x, 50000
+  store i32 %y, i32* %p
+  ret void
+
+; CHECK: test5:
+; CHECK: movl	$50000, %ecx
+; CHECK: sarl	%cl, %edi
+; CHECK: ret
+}
+
+; rdar://9289501 - fast isel should fold trivial multiplies to shifts.
+define i64 @test6(i64 %x) nounwind ssp {
+entry:
+  %mul = mul nsw i64 %x, 8
+  ret i64 %mul
+
+; CHECK: test6:
+; CHECK: leaq	(,%rdi,8), %rax
+}
+
+define i32 @test7(i32 %x) nounwind ssp {
+entry:
+  %mul = mul nsw i32 %x, 8
+  ret i32 %mul
+; CHECK: test7:
+; CHECK: leal	(,%rdi,8), %eax
+}
+
+
+; rdar://9289507 - folding of immediates into 64-bit operations.
+define i64 @test8(i64 %x) nounwind ssp {
+entry:
+  %add = add nsw i64 %x, 7
+  ret i64 %add
+
+; CHECK: test8:
+; CHECK: addq	$7, %rdi
+}
+
+define i64 @test9(i64 %x) nounwind ssp {
+entry:
+  %add = mul nsw i64 %x, 7
+  ret i64 %add
+; CHECK: test9:
+; CHECK: imulq	$7, %rdi, %rax
+}
+
+; rdar://9297011 - Don't reject udiv by a power of 2.
+define i32 @test10(i32 %X) nounwind {
+  %Y = udiv i32 %X, 8
+  ret i32 %Y
+; CHECK: test10:
+; CHECK: shrl	$3, 
+}
+
+define i32 @test11(i32 %X) nounwind {
+  %Y = sdiv exact i32 %X, 8
+  ret i32 %Y
+; CHECK: test11:
+; CHECK: sarl	$3, 
+}
+
+
+; rdar://9297006 - Trunc to bool.
+define void @test12(i8 %tmp) nounwind ssp noredzone {
+entry:
+  %tobool = trunc i8 %tmp to i1
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @test12(i8 0) noredzone
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+; CHECK: test12:
+; CHECK: testb	$1,
+; CHECK-NEXT: je L
+; CHECK-NEXT: movl $0, %edi
+; CHECK-NEXT: callq
+}
+
+declare void @test13f(i1 %X)
+
+define void @test13() nounwind {
+  call void @test13f(i1 0)
+  ret void
+; CHECK: test13:
+; CHECK: movl $0, %edi
+; CHECK-NEXT: callq
+}
+
+
+
+; rdar://9297003 - fast isel bails out on all functions taking bools
+define void @test14(i8 %tmp) nounwind ssp noredzone {
+entry:
+  %tobool = trunc i8 %tmp to i1
+  call void @test13f(i1 zeroext %tobool) noredzone
+  ret void
+; CHECK: test14:
+; CHECK: andb	$1, 
+; CHECK: callq
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
+
+; rdar://9289488 - fast-isel shouldn't bail out on llvm.memcpy
+define void @test15(i8* %a, i8* %b) nounwind {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 4, i32 4, i1 false)
+  ret void
+; CHECK: test15:
+; CHECK-NEXT: movl	(%rsi), %eax
+; CHECK-NEXT: movl	%eax, (%rdi)
+; CHECK-NEXT: ret
+}
+
+; Handling for varargs calls
+declare void @test16callee(...) nounwind
+define void @test16() nounwind {
+; CHECK: test16:
+; CHECK: movl $1, %edi
+; CHECK: movb $0, %al
+; CHECK: callq _test16callee
+  call void (...)* @test16callee(i32 1)
+  br label %block2
+
+block2:
+; CHECK: movabsq $1
+; CHECK: cvtsi2sdq {{.*}} %xmm0
+; CHECK: movb $1, %al
+; CHECK: callq _test16callee
+  call void (...)* @test16callee(double 1.000000e+00)
+  ret void
+}
+
+
+declare void @foo() unnamed_addr ssp align 2
+
+; Verify that we don't fold the load into the compare here.  That would move it
+; w.r.t. the call.
+define i32 @test17(i32 *%P) ssp nounwind {
+entry:
+  %tmp = load i32* %P
+  %cmp = icmp ne i32 %tmp, 5
+  call void @foo()
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  ret i32 1
+
+if.else:                                          ; preds = %entry
+  ret i32 2
+; CHECK: test17:
+; CHECK: movl	(%rdi), %eax
+; CHECK: callq _foo
+; CHECK: cmpl	$5, %eax
+; CHECK-NEXT: je 
+}
+
+; Check that 0.0 is materialized using pxor
+define void @test18(float* %p1) {
+  store float 0.0, float* %p1
+  ret void
+; CHECK: test18:
+; CHECK: pxor
+}
+define void @test19(double* %p1) {
+  store double 0.0, double* %p1
+  ret void
+; CHECK: test19:
+; CHECK: pxor
+}
+
+; Check that we fast-isel sret
+%struct.a = type { i64, i64, i64 }
+define void @test20() nounwind ssp {
+entry:
+  %tmp = alloca %struct.a, align 8
+  call void @test20sret(%struct.a* sret %tmp)
+  ret void
+; CHECK: test20:
+; CHECK: leaq (%rsp), %rdi
+; CHECK: callq _test20sret
+}
+declare void @test20sret(%struct.a* sret)
+
+; Check that -0.0 is not materialized using pxor
+define void @test21(double* %p1) {
+  store double -0.0, double* %p1
+  ret void
+; CHECK: test21:
+; CHECK-NOT: pxor
+; CHECK: movsd	LCPI
+}
+\ No newline at end of file
diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll
index 56aeb3a..19972f7 100644
--- a/test/CodeGen/X86/fast-isel-x86.ll
+++ b/test/CodeGen/X86/fast-isel-x86.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86 -relocation-model=pic < %s
+; RUN: llc -fast-isel -O0 -mtriple=i386-apple-darwin10 -relocation-model=pic < %s | FileCheck %s
 
 ; This should use flds to set the return value.
 ; CHECK: test0:
@@ -31,3 +31,18 @@ define i32 @test2() nounwind {
   %t = load i32* @HHH
   ret i32 %t
 }
+
+; Check that we fast-isel sret, and handle the callee-pops behavior correctly.
+%struct.a = type { i64, i64, i64 }
+define void @test3() nounwind ssp {
+entry:
+  %tmp = alloca %struct.a, align 8
+  call void @test3sret(%struct.a* sret %tmp)
+  ret void
+; CHECK: test3:
+; CHECK: subl $44
+; CHECK: leal 16(%esp)
+; CHECK: calll _test3sret
+; CHECK: addl $40
+}
+declare void @test3sret(%struct.a* sret)
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index 177c06b..5a1d213 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -20,6 +20,7 @@ fast:
   %t6 = add i32 %t5, 2
   %t7 = getelementptr i32* %y, i32 1
   %t8 = getelementptr i32* %t7, i32 %t6
+  call void asm sideeffect "hello world", ""()
   br label %exit
 
 exit:
@@ -92,3 +93,13 @@ define void @load_store_i1(i1* %p, i1* %q) nounwind {
   store i1 %t, i1* %q
   ret void
 }
+
+
+@crash_test1x = external global <2 x i32>, align 8
+
+define void @crash_test1() nounwind ssp {
+  %tmp = load <2 x i32>* @crash_test1x, align 8
+  %neg = xor <2 x i32> %tmp, <i32 -1, i32 -1>
+  ret void
+}
+
diff --git a/test/CodeGen/X86/fold-mul-lohi.ll b/test/CodeGen/X86/fold-mul-lohi.ll
index 0351eca..5614c80 100644
--- a/test/CodeGen/X86/fold-mul-lohi.ll
+++ b/test/CodeGen/X86/fold-mul-lohi.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 | not grep lea
-; RUN: llc < %s -march=x86-64 | not grep lea
+; RUN: llc < %s -march=x86            | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT: lea
 
 @B = external global [1000 x i8], align 32
 @A = external global [1000 x i8], align 32
diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll
index e5be58e..647bbdb 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-0.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll
@@ -1,11 +1,21 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | grep orps | grep CPI0_2  | count 2
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=X86-64 %s
 
 ; This testcase shouldn't need to spill the -1 value,
 ; so it should just use pcmpeqd to materialize an all-ones vector.
 ; For i386, cp load of -1 are folded.
 
+; With -regalloc=greedy, the live range is split before spilling, so the first
+; pcmpeq doesn't get folded as a constant pool load.
+
+; I386-NOT: pcmpeqd
+; I386: orps LCPI0_2, %xmm
+; I386-NOT: pcmpeqd
+; I386: orps LCPI0_2, %xmm
+
+; X86-64: pcmpeqd
+; X86-64-NOT: pcmpeqd
+
 	%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
 	%struct._cl_image_format_t = type <{ i32, i32, i32 }>
 	%struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }>
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 49f8795..9f8d990 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -1,10 +1,20 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -regalloc=linearscan | FileCheck %s
 
-; This testcase should need to spill the -1 value on x86-32,
+; This testcase should need to spill the -1 value on both x86-32 and x86-64,
 ; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
 ; should use a constant-pool load instead.
 
+; Constant pool all-ones vector:
+; CHECK: .long 4294967295
+; CHECK-NEXT: .long 4294967295
+; CHECK-NEXT: .long 4294967295
+; CHECK-NEXT: .long 4294967295
+
+; No pcmpeqd instructions, everybody uses the constant pool.
+; CHECK: program_1:
+; CHECK-NOT: pcmpeqd
+
 	%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
 	%struct._cl_image_format_t = type <{ i32, i32, i32 }>
 	%struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }>
@@ -57,6 +67,7 @@ forbody:		; preds = %forcond
 	%bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
 	%andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7		; <<4 x i32>> [#uses=1]
+	call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
 	%orps.i9 = or <4 x i32> %andnps.i8, %andps.i5		; <<4 x i32>> [#uses=1]
 	%bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float>		; <<4 x float>> [#uses=1]
 	%tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/fold-zext-trunc.ll b/test/CodeGen/X86/fold-zext-trunc.ll
new file mode 100644
index 0000000..f901ad2
--- /dev/null
+++ b/test/CodeGen/X86/fold-zext-trunc.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s | FileCheck %s
+; PR9055
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-linux-gnu"
+
+%struct.S0 = type { i32, [2 x i8], [2 x i8], [4 x i8] }
+
+@g_98 = common global %struct.S0 zeroinitializer, align 4
+
+define void @foo() nounwind {
+; CHECK: movzbl
+; CHECK-NOT: movzbl
+; CHECK: calll
+entry:
+  %tmp17 = load i8* getelementptr inbounds (%struct.S0* @g_98, i32 0, i32 1, i32 0), align 4
+  %tmp54 = zext i8 %tmp17 to i32
+  %foo = load i32* bitcast (i8* getelementptr inbounds (%struct.S0* @g_98, i32 0, i32 1, i32 0) to i32*), align 4
+  %conv.i = trunc i32 %foo to i8
+  tail call void @func_12(i32 %tmp54, i8 zeroext %conv.i) nounwind
+  ret void
+}
+
+declare void @func_12(i32, i8 zeroext)
diff --git a/test/CodeGen/X86/fp-stack-compare.ll b/test/CodeGen/X86/fp-stack-compare.ll
index b216914..f3998b6 100644
--- a/test/CodeGen/X86/fp-stack-compare.ll
+++ b/test/CodeGen/X86/fp-stack-compare.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=x86 -mcpu=i386 | grep {fucompi.*st.\[12\]}
+; RUN: llc < %s -march=x86 -mcpu=i386 | FileCheck %s
 ; PR1012
 
 define float @foo(float* %col.2.0) {
-        %tmp = load float* %col.2.0             ; <float> [#uses=3]
-        %tmp16 = fcmp olt float %tmp, 0.000000e+00              ; <i1> [#uses=1]
-        %tmp20 = fsub float -0.000000e+00, %tmp          ; <float> [#uses=1]
-        %iftmp.2.0 = select i1 %tmp16, float %tmp20, float %tmp         ; <float> [#uses=1]
-        ret float %iftmp.2.0
+; CHECK: fucompi
+  %tmp = load float* %col.2.0
+  %tmp16 = fcmp olt float %tmp, 0.000000e+00
+  %tmp20 = fsub float -0.000000e+00, %tmp
+  %iftmp.2.0 = select i1 %tmp16, float %tmp20, float %tmp
+  ret float %iftmp.2.0
 }
-
diff --git a/test/CodeGen/X86/fp-trunc.ll b/test/CodeGen/X86/fp-trunc.ll
new file mode 100644
index 0000000..170637a
--- /dev/null
+++ b/test/CodeGen/X86/fp-trunc.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s
+
+define <1 x float> @test1(<1 x double> %x) nounwind {
+; CHECK: cvtsd2ss
+; CHECK: ret
+  %y = fptrunc <1 x double> %x to <1 x float>
+  ret <1 x float> %y
+}
+
+
+define <2 x float> @test2(<2 x double> %x) nounwind {
+; FIXME: It would be nice if this compiled down to a cvtpd2ps
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: ret
+  %y = fptrunc <2 x double> %x to <2 x float>
+  ret <2 x float> %y
+}
+
+define <8 x float> @test3(<8 x double> %x) nounwind {
+; FIXME: It would be nice if this compiled down to a series of cvtpd2ps
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: cvtsd2ss
+; CHECK: ret
+  %y = fptrunc <8 x double> %x to <8 x float>
+  ret <8 x float> %y
+}
+
+
diff --git a/test/CodeGen/X86/global-sections-tls.ll b/test/CodeGen/X86/global-sections-tls.ll
index 2c23030..d5409a5 100644
--- a/test/CodeGen/X86/global-sections-tls.ll
+++ b/test/CodeGen/X86/global-sections-tls.ll
@@ -2,7 +2,7 @@
 
 ; PR4639
 @G1 = internal thread_local global i32 0		; <i32*> [#uses=1]
-; LINUX: .section		.tbss,"awT",@nobits
+; LINUX: .section	.tbss,"awT",@nobits
 ; LINUX: G1:
 
 
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index 39a69e1..d0a1b4d 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -99,7 +99,7 @@
 ; DARWIN: _G7:
 ; DARWIN:	.asciz	"abcdefghi"
 
-; LINUX:	.section		.rodata.str1.1,"aMS",@progbits,1
+; LINUX:	.section	.rodata.str1.1,"aMS",@progbits,1
 ; LINUX:	.globl G7
 ; LINUX: G7:
 ; LINUX:	.asciz	"abcdefghi"
@@ -114,7 +114,7 @@
 ; DARWIN:	.globl _G8
 ; DARWIN: _G8:
 
-; LINUX:	.section		.rodata.str2.2,"aMS",@progbits,2
+; LINUX:	.section	.rodata.str2.2,"aMS",@progbits,2
 ; LINUX:	.globl G8
 ; LINUX:G8:
 
@@ -123,7 +123,7 @@
 ; DARWIN:	.globl _G9
 ; DARWIN: _G9:
 
-; LINUX:	.section		.rodata.str4.4,"aMS",@progbits,4
+; LINUX:	.section	.rodata.str4.4,"aMS",@progbits,4
 ; LINUX:	.globl G9
 ; LINUX:G9
 
diff --git a/test/CodeGen/X86/h-register-store.ll b/test/CodeGen/X86/h-register-store.ll
index d30e6b3..0adb2b1 100644
--- a/test/CodeGen/X86/h-register-store.ll
+++ b/test/CodeGen/X86/h-register-store.ll
@@ -1,9 +1,29 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep mov %t | count 6
-; RUN: grep {movb	%ah, (%rsi)} %t | count 3
-; RUN: llc < %s -march=x86 > %t
-; RUN: grep mov %t | count 3
-; RUN: grep {movb	%ah, (%e} %t | count 3
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64:      mov
+; X64-NEXT: movb %ah, (%rsi)
+; X64:      mov
+; X64-NEXT: movb %ah, (%rsi)
+; X64:      mov
+; X64-NEXT: movb %ah, (%rsi)
+; X64-NOT:      mov
+
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
+; W64-NOT:      mov
+; W64:      movb %ch, (%rdx)
+; W64-NOT:      mov
+; W64:      movb %ch, (%rdx)
+; W64-NOT:      mov
+; W64:      movb %ch, (%rdx)
+; W64-NOT:      mov
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:      mov
+; X32:      movb %ah, (%e
+; X32-NOT:      mov
+; X32:      movb %ah, (%e
+; X32-NOT:      mov
+; X32:      movb %ah, (%e
+; X32-NOT:      mov
 
 ; Use h-register extract and store.
 
diff --git a/test/CodeGen/X86/h-registers-0.ll b/test/CodeGen/X86/h-registers-0.ll
index e84bb9a..cdc75af 100644
--- a/test/CodeGen/X86/h-registers-0.ll
+++ b/test/CodeGen/X86/h-registers-0.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
 ; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=X86-32
 
 ; Use h registers. On x86-64, codegen doesn't support general allocation
@@ -9,6 +10,12 @@ define void @bar64(i64 inreg %x, i8* inreg %p) nounwind {
 ; X86-64: shrq $8, %rdi
 ; X86-64: incb %dil
 
+; See FIXME: on regclass GR8.
+; It could be optimally transformed like; incb %ch; movb %ch, (%rdx)
+; WIN64:  bar64:
+; WIN64:  shrq $8, %rcx
+; WIN64:  incb %cl
+
 ; X86-32: bar64:
 ; X86-32: incb %ah
   %t0 = lshr i64 %x, 8
@@ -23,6 +30,10 @@ define void @bar32(i32 inreg %x, i8* inreg %p) nounwind {
 ; X86-64: shrl $8, %edi
 ; X86-64: incb %dil
 
+; WIN64:  bar32:
+; WIN64:  shrl $8, %ecx
+; WIN64:  incb %cl
+
 ; X86-32: bar32:
 ; X86-32: incb %ah
   %t0 = lshr i32 %x, 8
@@ -37,6 +48,10 @@ define void @bar16(i16 inreg %x, i8* inreg %p) nounwind {
 ; X86-64: shrl $8, %edi
 ; X86-64: incb %dil
 
+; WIN64:  bar16:
+; WIN64:  shrl $8, %ecx
+; WIN64:  incb %cl
+
 ; X86-32: bar16:
 ; X86-32: incb %ah
   %t0 = lshr i16 %x, 8
@@ -51,6 +66,9 @@ define i64 @qux64(i64 inreg %x) nounwind {
 ; X86-64: movq %rdi, %rax
 ; X86-64: movzbl %ah, %eax
 
+; WIN64:  qux64:
+; WIN64:  movzbl %ch, %eax
+
 ; X86-32: qux64:
 ; X86-32: movzbl %ah, %eax
   %t0 = lshr i64 %x, 8
@@ -63,6 +81,9 @@ define i32 @qux32(i32 inreg %x) nounwind {
 ; X86-64: movl %edi, %eax
 ; X86-64: movzbl %ah, %eax
 
+; WIN64:  qux32:
+; WIN64:  movzbl %ch, %eax
+
 ; X86-32: qux32:
 ; X86-32: movzbl %ah, %eax
   %t0 = lshr i32 %x, 8
@@ -75,6 +96,9 @@ define i16 @qux16(i16 inreg %x) nounwind {
 ; X86-64: movl %edi, %eax
 ; X86-64: movzbl %ah, %eax
 
+; WIN64:  qux16:
+; WIN64:  movzbl %ch, %eax
+
 ; X86-32: qux16:
 ; X86-32: movzbl %ah, %eax
   %t0 = lshr i16 %x, 8
diff --git a/test/CodeGen/X86/h-registers-1.ll b/test/CodeGen/X86/h-registers-1.ll
index e97ebab..402cdfe 100644
--- a/test/CodeGen/X86/h-registers-1.ll
+++ b/test/CodeGen/X86/h-registers-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 > %t
+; RUN: llc < %s -mtriple=x86_64-linux > %t
 ; RUN: grep {movzbl	%\[abcd\]h,} %t | count 8
 ; RUN: grep {%\[abcd\]h} %t | not grep {%r\[\[:digit:\]\]*d}
 
diff --git a/test/CodeGen/X86/hidden-vis-pic.ll b/test/CodeGen/X86/hidden-vis-pic.ll
index ba130a2..217dba6 100644
--- a/test/CodeGen/X86/hidden-vis-pic.ll
+++ b/test/CodeGen/X86/hidden-vis-pic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin9 -relocation-model=pic -disable-fp-elim -unwind-tables | FileCheck %s
+; RUN: llc < %s -disable-cfi -mtriple=i386-apple-darwin9 -relocation-model=pic -disable-fp-elim -unwind-tables | FileCheck %s
 
 
 
diff --git a/test/CodeGen/X86/i64-mem-copy.ll b/test/CodeGen/X86/i64-mem-copy.ll
index 847e209..dce12ae 100644
--- a/test/CodeGen/X86/i64-mem-copy.ll
+++ b/test/CodeGen/X86/i64-mem-copy.ll
@@ -1,5 +1,9 @@
-; RUN: llc < %s -march=x86-64           | grep {movq.*(%rsi), %rax}
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movsd.*(%eax),}
+; RUN: llc < %s -mtriple=x86_64-linux   | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32   | FileCheck %s -check-prefix=X64
+; X64: movq ({{%rsi|%rdx}}), %r
+
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=X32
+; X32: movsd (%eax), %xmm
 
 ; Uses movsd to load / store i64 values if sse2 is available.
 
diff --git a/test/CodeGen/X86/iabs.ll b/test/CodeGen/X86/iabs.ll
index 6a79ee8..a8ba015 100644
--- a/test/CodeGen/X86/iabs.ll
+++ b/test/CodeGen/X86/iabs.ll
@@ -1,12 +1,11 @@
 ; RUN: llc < %s -march=x86-64 -stats  |& \
-; RUN:   grep {6 .*Number of machine instrs printed}
+; RUN:   grep {5 .*Number of machine instrs printed}
 
 ;; Integer absolute value, should produce something at least as good as:
-;;       movl %edi, %eax
-;;       sarl $31, %eax
-;;       addl %eax, %edi
-;;       xorl %eax, %edi
-;;       movl %edi, %eax
+;;       movl %edi, %ecx
+;;       sarl $31, %ecx
+;;       leal (%rdi,%rcx), %eax
+;;       xorl %ecx, %eax
 ;;       ret
 define i32 @test(i32 %a) nounwind {
         %tmp1neg = sub i32 0, %a
diff --git a/test/CodeGen/X86/isel-sink3.ll b/test/CodeGen/X86/isel-sink3.ll
index 8d3d97a..7012cce 100644
--- a/test/CodeGen/X86/isel-sink3.ll
+++ b/test/CodeGen/X86/isel-sink3.ll
@@ -1,9 +1,11 @@
-; RUN: llc < %s | grep {addl.\$4, %ecx}
-; RUN: llc < %s | not grep leal
+; RUN: llc < %s | FileCheck %s
 ; this should not sink %1 into bb1, that would increase reg pressure.
 
 ; rdar://6399178
 
+; CHECK: addl $4,
+; CHECK-NOT: leal
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 
diff --git a/test/CodeGen/X86/lea-3.ll b/test/CodeGen/X86/lea-3.ll
index 44413d6..c439ee1 100644
--- a/test/CodeGen/X86/lea-3.ll
+++ b/test/CodeGen/X86/lea-3.ll
@@ -1,17 +1,20 @@
-; RUN: llc < %s -march=x86-64 | grep {leal	(%rdi,%rdi,2), %eax}
-define i32 @test(i32 %a) {
-        %tmp2 = mul i32 %a, 3           ; <i32> [#uses=1]
-        ret i32 %tmp2
-}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
 
-; RUN: llc < %s -march=x86-64 | grep {leaq	(,%rdi,4), %rax}
+; CHECK: leaq (,[[A0:%rdi|%rcx]],4), %rax
 define i64 @test2(i64 %a) {
         %tmp2 = shl i64 %a, 2
 	%tmp3 = or i64 %tmp2, %a
         ret i64 %tmp3
 }
 
-;; TODO!  LEA instead of shift + copy.
+; CHECK: leal ([[A0]],[[A0]],2), %eax
+define i32 @test(i32 %a) {
+        %tmp2 = mul i32 %a, 3           ; <i32> [#uses=1]
+        ret i32 %tmp2
+}
+
+; CHECK: leaq (,[[A0]],8), %rax
 define i64 @test3(i64 %a) {
         %tmp2 = shl i64 %a, 3
         ret i64 %tmp2
diff --git a/test/CodeGen/X86/lock-inst-encoding.ll b/test/CodeGen/X86/lock-inst-encoding.ll
index 03468e2..2d10fbc 100644
--- a/test/CodeGen/X86/lock-inst-encoding.ll
+++ b/test/CodeGen/X86/lock-inst-encoding.ll
@@ -4,10 +4,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.0.0"
 
 ; CHECK: f0:
-; CHECK: addq %rax, (%rdi)
-; CHECK: # encoding: [0xf0,0x48,0x01,0x07]
+; CHECK: addq %{{.*}}, ({{.*}}){{.*}}encoding: [0xf0,
 ; CHECK: ret
-define void @f0(i64* %a0) {
+define void @f0(i64* %a0) nounwind {
   %t0 = and i64 1, 1
   call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) nounwind
   %1 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %a0, i64 %t0) nounwind
diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
index 6556fde..32e7879 100644
--- a/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -4,10 +4,10 @@
 ; By starting the IV at -64 instead of 0, a cmp is eliminated,
 ; as the flags from the add can be used directly.
 
-; STATIC: movl    $-64, %ecx
+; STATIC: movl    $-64, [[ECX:%e..]]
 
-; STATIC: movl    %eax, _state+76(%ecx)
-; STATIC: addl    $16, %ecx
+; STATIC: movl    [[EAX:%e..]], _state+76([[ECX]])
+; STATIC: addl    $16, [[ECX]]
 ; STATIC: jne
 
 ; In PIC mode the symbol can't be folded, so the change-compare-stride
diff --git a/test/CodeGen/X86/lsr-interesting-step.ll b/test/CodeGen/X86/lsr-interesting-step.ll
index 4b7050b..d1de051 100644
--- a/test/CodeGen/X86/lsr-interesting-step.ll
+++ b/test/CodeGen/X86/lsr-interesting-step.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=x86-64 -relocation-model=static -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -march=x86-64 -relocation-model=static -mtriple=x86_64-unknown-linux-gnu -asm-verbose=0 | FileCheck %s
 
 ; The inner loop should require only one add (and no leas either).
 ; rdar://8100380
 
-; CHECK:      BB0_4:
+; CHECK:      BB0_3:
 ; CHECK-NEXT:   movb    $0, flags(%rdx)
 ; CHECK-NEXT:   addq    %rcx, %rdx
 ; CHECK-NEXT:   cmpq    $8192, %rdx
diff --git a/test/CodeGen/X86/lsr-quadratic-expand.ll b/test/CodeGen/X86/lsr-quadratic-expand.ll
new file mode 100644
index 0000000..2bbb470
--- /dev/null
+++ b/test/CodeGen/X86/lsr-quadratic-expand.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=x86-64 < %s
+
+define void @dw2102_i2c_transfer() nounwind {
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %z = phi i64 [ 0, %entry ], [ %z3, %bb ]
+  %z1 = phi i16 [ undef, %entry ], [ %z6, %bb ]
+  %z2 = phi i32 [ 0, %entry ], [ %z8, %bb ]
+  %z3 = add i64 %z, 1
+  %z4 = zext i16 %z1 to i32
+  %z5 = add nsw i32 %z4, %z2
+  %z6 = trunc i32 %z5 to i16
+  call fastcc void @dw210x_op_rw(i16 zeroext %z6)
+  %z7 = getelementptr i8* null, i64 %z
+  store i8 undef, i8* %z7, align 1
+  %z8 = add nsw i32 %z2, 1
+  br label %bb
+}
+
+declare fastcc void @dw210x_op_rw(i16 zeroext) nounwind
diff --git a/test/CodeGen/X86/lsr-redundant-addressing.ll b/test/CodeGen/X86/lsr-redundant-addressing.ll
new file mode 100644
index 0000000..aaa1426
--- /dev/null
+++ b/test/CodeGen/X86/lsr-redundant-addressing.ll
@@ -0,0 +1,45 @@
+; RUN: llc -march=x86-64 < %s | fgrep {addq	$-16,} | count 1
+; rdar://9081094
+
+; LSR shouldn't create lots of redundant address computations.
+
+%0 = type { i32, [3 x i32] }
+%1 = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
+
+@pgm = external hidden unnamed_addr global [5 x %0], align 32
+@isa = external hidden unnamed_addr constant [13 x %1], align 32
+
+define void @main_bb.i() nounwind {
+bb:
+  br label %bb38
+
+bb38:                                             ; preds = %bb200, %bb
+  %tmp39 = phi i64 [ %tmp201, %bb200 ], [ 0, %bb ]
+  %tmp40 = sub i64 0, %tmp39
+  %tmp47 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 0
+  %tmp34 = load i32* %tmp47, align 16
+  %tmp203 = icmp slt i32 %tmp34, 12
+  br i1 %tmp203, label %bb215, label %bb200
+
+bb200:                                            ; preds = %bb38
+  %tmp201 = add i64 %tmp39, 1
+  br label %bb38
+
+bb215:                                            ; preds = %bb38
+  %tmp50 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 2
+  %tmp49 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 1
+  %tmp48 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 0
+  %tmp216 = add nsw i32 %tmp34, 1
+  store i32 %tmp216, i32* %tmp47, align 16
+  %tmp217 = sext i32 %tmp216 to i64
+  %tmp218 = getelementptr inbounds [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 0
+  %tmp219 = load i32* %tmp218, align 8
+  store i32 %tmp219, i32* %tmp48, align 4
+  %tmp220 = getelementptr inbounds [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 1
+  %tmp221 = load i32* %tmp220, align 4
+  store i32 %tmp221, i32* %tmp49, align 4
+  %tmp222 = getelementptr inbounds [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 2
+  %tmp223 = load i32* %tmp222, align 8
+  store i32 %tmp223, i32* %tmp50, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/lsr-reuse-trunc.ll b/test/CodeGen/X86/lsr-reuse-trunc.ll
index 29f03d6..4770519 100644
--- a/test/CodeGen/X86/lsr-reuse-trunc.ll
+++ b/test/CodeGen/X86/lsr-reuse-trunc.ll
@@ -4,8 +4,9 @@
 ; Full strength reduction wouldn't reduce register pressure, so LSR should
 ; stick with indexing here.
 
-; CHECK: movaps        (%{{rsi|rdx}},%rax,4), %xmm3
-; CHECK: movaps        %xmm3, (%{{rdi|rcx}},%rax,4)
+; CHECK: movaps        (%{{rsi|rdx}},%rax,4), [[X3:%xmm[0-9]+]]
+; CHECK: movaps
+; CHECK:        [[X3]], (%{{rdi|rcx}},%rax,4)
 ; CHECK: addq  $4, %rax
 ; CHECK: cmpl  %eax, (%{{rdx|r8}})
 ; CHECK-NEXT: jg
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index 2a97629..527a5a6 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -1,3 +1,4 @@
+; XFAIL: *
 ; RUN: llc < %s -march=x86-64 -O3 -asm-verbose=false | FileCheck %s
 target datalayout = "e-p:64:64:64"
 target triple = "x86_64-unknown-unknown"
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index e284776..d819fc8 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -6,11 +6,11 @@
 %struct.s2 = type { i32, i8*, i8*, [256 x %struct.s1*], [8 x i32], i64, i8*, i32, i64, i64, i32, %struct.s3*, %struct.s3*, [49 x i64] }
 %struct.s3 = type { %struct.s3*, %struct.s3*, i32, i32, i32 }
 
-define fastcc i8* @t(i64 %size) nounwind {
+define fastcc i8* @t(i32 %base) nounwind {
 entry:
 ; CHECK: t:
 ; CHECK: leaq (%rax,%rax,4)
-  %0 = zext i32 undef to i64
+  %0 = zext i32 %base to i64
   %1 = getelementptr inbounds %struct.s2* null, i64 %0
   br i1 undef, label %bb1, label %bb2
 
diff --git a/test/CodeGen/X86/mcinst-lowering-cmp0.ll b/test/CodeGen/X86/mcinst-lowering-cmp0.ll
deleted file mode 100644
index 756be1f..0000000
--- a/test/CodeGen/X86/mcinst-lowering-cmp0.ll
+++ /dev/null
@@ -1,68 +0,0 @@
-; RUN: llc --show-mc-encoding -relocation-model=pic -disable-fp-elim -O3 < %s | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-target triple = "i386-apple-darwin10.0.0"
-
-%struct.NSConstantString = type { i32*, i32, i8*, i32 }
-%struct._objc_module = type { i32, i32, i8*, %struct._objc_symtab* }
-%struct._objc_symtab = type { i32, i8*, i16, i16, [0 x i8*] }
-
-@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__OBJC, __image_info,regular" ; <[2 x i32]*> [#uses=1]
-@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__cstring,cstring_literals", align 1 ; <[4 x i8]*> [#uses=1]
-@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__OBJC,__message_refs,literal_pointers,no_dead_strip", align 4 ; <i8**> [#uses=3]
-@__CFConstantStringClassReference = external global [0 x i32] ; <[0 x i32]*> [#uses=1]
-@.str = private constant [3 x i8] c"||\00"        ; <[3 x i8]*> [#uses=1]
-@_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 2 }, section "__DATA,__cfstring" ; <%struct.NSConstantString*> [#uses=1]
-@"\01L_OBJC_METH_VAR_NAME_1" = internal global [5 x i8] c"baz:\00", section "__TEXT,__cstring,cstring_literals", align 1 ; <[5 x i8]*> [#uses=1]
-@"\01L_OBJC_SELECTOR_REFERENCES_2" = internal global i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), section "__OBJC,__message_refs,literal_pointers,no_dead_strip", align 4 ; <i8**> [#uses=2]
-@"\01L_OBJC_METH_VAR_NAME_3" = internal global [4 x i8] c"bar\00", section "__TEXT,__cstring,cstring_literals", align 1 ; <[4 x i8]*> [#uses=1]
-@"\01L_OBJC_SELECTOR_REFERENCES_4" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), section "__OBJC,__message_refs,literal_pointers,no_dead_strip", align 4 ; <i8**> [#uses=2]
-@"\01L_OBJC_CLASS_NAME_" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals", align 1 ; <[1 x i8]*> [#uses=1]
-@"\01L_OBJC_MODULES" = internal global %struct._objc_module { i32 7, i32 16, i8* getelementptr inbounds ([1 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), %struct._objc_symtab* null }, section "__OBJC,__module_info,regular,no_dead_strip", align 4 ; <%struct._objc_module*> [#uses=1]
-@llvm.used = appending global [9 x i8*] [i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_2" to i8*), i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_4" to i8*), i8* getelementptr inbounds ([1 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*)], section "llvm.metadata" ; <[9 x i8*]*> [#uses=0]
-
-define void @f0(i8* nocapture %a, i8* nocapture %b) nounwind optsize ssp {
-entry:
-  %call = tail call i32 (...)* @get_name() nounwind optsize ; <i32> [#uses=2]
-  %conv = inttoptr i32 %call to i8*               ; <i8*> [#uses=1]
-  %call1 = tail call i32 (...)* @get_dict() nounwind optsize ; <i32> [#uses=2]
-  %conv2 = inttoptr i32 %call1 to i8*             ; <i8*> [#uses=2]
-
-; Check that we lower to the short form of cmpl, which has an 8-bit immediate.
-;
-; CHECK: cmpl  $0, -16(%ebp)           ## 4-byte Folded Reload
-; CHECK:                               ## encoding: [0x83,0x7d,0xf0,0x00]
-; rdar://7999130
-  %cmp = icmp eq i32 %call1, 0                    ; <i1> [#uses=1]
-  br i1 %cmp, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %tmp5 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_" ; <i8*> [#uses=1]
-  %call6 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %conv2, i8* %tmp5) nounwind optsize ; <i8*> [#uses=1]
-  %tmp7 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_2" ; <i8*> [#uses=1]
-  %call820 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call6, i8* %tmp7, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*)) nounwind optsize ; <i8*> [#uses=0]
-  br label %if.end
-
-if.end:                                           ; preds = %entry, %if.then
-  %tmp10 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_" ; <i8*> [#uses=1]
-  %call11 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %conv2, i8* %tmp10) nounwind optsize ; <i8*> [#uses=1]
-  %tmp12 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_4" ; <i8*> [#uses=1]
-  %call13 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call11, i8* %tmp12) nounwind optsize ; <i8*> [#uses=0]
-  %cmp15 = icmp eq i32 %call, 0                   ; <i1> [#uses=1]
-  br i1 %cmp15, label %if.end19, label %if.then17
-
-if.then17:                                        ; preds = %if.end
-  tail call void (...)* @f1(i8* %conv) nounwind optsize
-  ret void
-
-if.end19:                                         ; preds = %if.end
-  ret void
-}
-
-declare i32 @get_name(...) optsize
-
-declare i32 @get_dict(...) optsize
-
-declare i8* @objc_msgSend(i8*, i8*, ...)
-
-declare void @f1(...) optsize
diff --git a/test/CodeGen/X86/mmx-copy-gprs.ll b/test/CodeGen/X86/mmx-copy-gprs.ll
index 3607043..37787556 100644
--- a/test/CodeGen/X86/mmx-copy-gprs.ll
+++ b/test/CodeGen/X86/mmx-copy-gprs.ll
@@ -1,10 +1,12 @@
-; RUN: llc < %s -march=x86-64           | grep {movq.*(%rsi), %rax}
-; RUN: llc < %s -march=x86 -mattr=-sse2 | grep {movl.*4(%eax),}
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movsd.(%eax),}
+; RUN: llc < %s -mtriple=x86_64-linux   | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32   | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=-sse2 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 ; This test should use GPRs to copy the mmx value, not MMX regs.  Using mmx regs,
 ; increases the places that need to use emms.
-
+; CHECK-NOT: %mm
+; CHECK-NOT: emms
 ; rdar://5741668
 
 define void @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind  {
diff --git a/test/CodeGen/X86/narrow-shl-cst.ll b/test/CodeGen/X86/narrow-shl-cst.ll
new file mode 100644
index 0000000..a404f34
--- /dev/null
+++ b/test/CodeGen/X86/narrow-shl-cst.ll
@@ -0,0 +1,101 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; PR5039
+
+define i32 @test1(i32 %x) nounwind {
+  %and = shl i32 %x, 10
+  %shl = and i32 %and, 31744
+  ret i32 %shl
+; CHECK: test1:
+; CHECK: andl $31
+; CHECK: shll $10
+}
+
+define i32 @test2(i32 %x) nounwind {
+  %or = shl i32 %x, 10
+  %shl = or i32 %or, 31744
+  ret i32 %shl
+; CHECK: test2:
+; CHECK: orl $31
+; CHECK: shll $10
+}
+
+define i32 @test3(i32 %x) nounwind {
+  %xor = shl i32 %x, 10
+  %shl = xor i32 %xor, 31744
+  ret i32 %shl
+; CHECK: test3:
+; CHECK: xorl $31
+; CHECK: shll $10
+}
+
+define i64 @test4(i64 %x) nounwind {
+  %and = shl i64 %x, 40
+  %shl = and i64 %and, 264982302294016
+  ret i64 %shl
+; CHECK: test4:
+; CHECK: andq $241
+; CHECK: shlq $40
+}
+
+define i64 @test5(i64 %x) nounwind {
+  %and = shl i64 %x, 40
+  %shl = and i64 %and, 34084860461056
+  ret i64 %shl
+; CHECK: test5:
+; CHECK: andq $31
+; CHECK: shlq $40
+}
+
+define i64 @test6(i64 %x) nounwind {
+  %and = shl i64 %x, 32
+  %shl = and i64 %and, -281474976710656
+  ret i64 %shl
+; CHECK: test6:
+; CHECK: andq $-65536
+; CHECK: shlq $32
+}
+
+define i64 @test7(i64 %x) nounwind {
+  %or = shl i64 %x, 40
+  %shl = or i64 %or, 264982302294016
+  ret i64 %shl
+; CHECK: test7:
+; CHECK: orq $241
+; CHECK: shlq $40
+}
+
+define i64 @test8(i64 %x) nounwind {
+  %or = shl i64 %x, 40
+  %shl = or i64 %or, 34084860461056
+  ret i64 %shl
+; CHECK: test8:
+; CHECK: orq $31
+; CHECK: shlq $40
+}
+
+define i64 @test9(i64 %x) nounwind {
+  %xor = shl i64 %x, 40
+  %shl = xor i64 %xor, 264982302294016
+  ret i64 %shl
+; CHECK: test9:
+; CHECK: orq $241
+; CHECK: shlq $40
+}
+
+define i64 @test10(i64 %x) nounwind {
+  %xor = shl i64 %x, 40
+  %shl = xor i64 %xor, 34084860461056
+  ret i64 %shl
+; CHECK: test10:
+; CHECK: xorq $31
+; CHECK: shlq $40
+}
+
+define i64 @test11(i64 %x) nounwind {
+  %xor = shl i64 %x, 33
+  %shl = xor i64 %xor, -562949953421312
+  ret i64 %shl
+; CHECK: test11:
+; CHECK: xorq $-65536
+; CHECK: shlq $33
+}
diff --git a/test/CodeGen/X86/no-cfi.ll b/test/CodeGen/X86/no-cfi.ll
new file mode 100644
index 0000000..f9985d4
--- /dev/null
+++ b/test/CodeGen/X86/no-cfi.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -disable-cfi | FileCheck --check-prefix=STATIC %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -disable-cfi -relocation-model=pic | FileCheck --check-prefix=PIC %s
+
+; STATIC:      .ascii   "zPLR"
+; STATIC:      .byte   3
+; STATIC-NEXT: .long   __gxx_personality_v0
+; STATIC-NEXT: .byte   3
+; STATIC-NEXT: .byte   3
+
+; PIC:      .ascii   "zPLR"
+; PIC:      .byte   155
+; PIC-NEXT: .L
+; PIC-NEXT: .long   DW.ref.__gxx_personality_v0-.L
+; PIC-NEXT: .byte   27
+; PIC-NEXT: .byte   27
+
+
+define void @bar() {
+entry:
+  %call = invoke i32 @foo()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret void
+
+lpad:
+  %exn = call i8* @llvm.eh.exception() nounwind
+  %eh.selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null) nounwind
+  ret void
+}
+
+declare i32 @foo()
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll
index b90413d..e35eb70 100644
--- a/test/CodeGen/X86/optimize-max-3.ll
+++ b/test/CodeGen/X86/optimize-max-3.ll
@@ -45,8 +45,8 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK-NEXT:         align
 ; CHECK-NEXT: BB1_2:
 ; CHECK-NEXT:         callq
-; CHECK-NEXT:         incl    [[BX:%ebx|%esi]]
-; CHECK-NEXT:         cmpl    [[R14:%r14d|%edi]], [[BX]]
+; CHECK-NEXT:         incl    [[BX:%[a-z0-9]+]]
+; CHECK-NEXT:         cmpl    [[R14:%[a-z0-9]+]], [[BX]]
 ; CHECK-NEXT:         movq    %rax, %r{{di|cx}}
 ; CHECK-NEXT:         jl
 
diff --git a/test/CodeGen/X86/or-address.ll b/test/CodeGen/X86/or-address.ll
index 6447680..b3fc627 100644
--- a/test/CodeGen/X86/or-address.ll
+++ b/test/CodeGen/X86/or-address.ll
@@ -4,10 +4,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.3"
 
 
-; CHECK: 	movl	%{{.*}},   (%rdi,%rdx,4)
-; CHECK:	movl	%{{.*}},  8(%rdi,%rdx,4)
-; CHECK:	movl	%{{.*}},  4(%rdi,%rdx,4)
-; CHECK:	movl	%{{.*}}, 12(%rdi,%rdx,4)
+; CHECK: 	movl	%{{.*}},   (%rdi,[[R0:.+]],4)
+; CHECK:	movl	%{{.*}},  8(%rdi,[[R0]],4)
+; CHECK:	movl	%{{.*}},  4(%rdi,[[R0]],4)
+; CHECK:	movl	%{{.*}}, 12(%rdi,[[R0]],4)
 
 define void @test(i32* nocapture %array, i32 %r0) nounwind ssp noredzone {
 bb.nph:
diff --git a/test/CodeGen/X86/peep-vector-extract-concat.ll b/test/CodeGen/X86/peep-vector-extract-concat.ll
index e4ab2b5..606a9be 100644
--- a/test/CodeGen/X86/peep-vector-extract-concat.ll
+++ b/test/CodeGen/X86/peep-vector-extract-concat.ll
@@ -1,4 +1,9 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep {pshufd	\$3, %xmm0, %xmm0}
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2,-sse41 | FileCheck %s
+; CHECK: pshufd $3, %xmm0, %xmm0
+
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2,-sse41 | FileCheck %s -check-prefix=WIN64
+; %a is passed indirectly on Win64.
+; WIN64: movss   12(%rcx), %xmm0
 
 define float @foo(<8 x float> %a) nounwind {
   %c = extractelement <8 x float> %a, i32 3
diff --git a/test/CodeGen/X86/personality.ll b/test/CodeGen/X86/personality.ll
index 6789bb0..e952a9b 100644
--- a/test/CodeGen/X86/personality.ll
+++ b/test/CodeGen/X86/personality.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -disable-cfi -mtriple=x86_64-apple-darwin9 -disable-cgp-branch-opts | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -disable-cfi -mtriple=i386-apple-darwin9 -disable-cgp-branch-opts | FileCheck %s -check-prefix=X32
 ; PR1632
 
 define void @_Z1fv() {
@@ -38,13 +38,15 @@ declare void @__gxx_personality_v0()
 
 declare void @__cxa_end_catch()
 
-; X64: Leh_frame_common_begin0:
-; X64: .long	___gxx_personality_v0@GOTPCREL+4
+; X64:      zPLR
+; X64:      .byte 155
+; X64-NEXT: .long	___gxx_personality_v0@GOTPCREL+4
 
-; X32: Leh_frame_common_begin0:
-; X32: .long	L___gxx_personality_v0$non_lazy_ptr-
-; ....
+; X32:        .section	__IMPORT,__pointers,non_lazy_symbol_pointers
+; X32-NEXT: L___gxx_personality_v0$non_lazy_ptr:
+; X32-NEXT:   .indirect_symbol ___gxx_personality_v0
 
-; X32: .section	__IMPORT,__pointers,non_lazy_symbol_pointers
-; X32: L___gxx_personality_v0$non_lazy_ptr:
-; X32:   .indirect_symbol ___gxx_personality_v0
+; X32:      zPLR
+; X32:      .byte 155
+; X32-NEXT: :
+; X32-NEXT: .long	L___gxx_personality_v0$non_lazy_ptr-
diff --git a/test/CodeGen/X86/phi-constants.ll b/test/CodeGen/X86/phi-bit-propagation.ll
index da9652f..94c9722 100644
--- a/test/CodeGen/X86/phi-constants.ll
+++ b/test/CodeGen/X86/phi-bit-propagation.ll
@@ -33,3 +33,23 @@ return:                                           ; preds = %for.body, %for.cond
   %retval.0 = phi i1 [ true, %for.body ], [ false, %for.cond ]
   ret i1 %retval.0
 }
+
+; This test case caused an assertion failure; see PR9324.
+define void @func_37() noreturn nounwind ssp {
+entry:
+  br i1 undef, label %lbl_919, label %entry.for.inc_crit_edge
+
+entry.for.inc_crit_edge:                          ; preds = %entry
+  br label %for.inc
+
+lbl_919:                                          ; preds = %for.cond7.preheader, %entry
+  br label %for.cond7.preheader
+
+for.cond7.preheader:                              ; preds = %for.inc, %lbl_919
+  %storemerge.ph = phi i8 [ 0, %lbl_919 ], [ %add, %for.inc ]
+  br i1 undef, label %for.inc, label %lbl_919
+
+for.inc:                                          ; preds = %for.cond7.preheader, %entry.for.inc_crit_edge
+  %add = add i8 undef, 1
+  br label %for.cond7.preheader
+}
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index dc5fcd7..fb60ac2 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -79,8 +79,8 @@ entry:
 ; LINUX-NEXT: .L3$pb:
 ; LINUX: 	popl
 ; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb), %[[REG3:e..]]
-; LINUX: 	movl	pfoo@GOT(%[[REG3]]),
 ; LINUX: 	calll	afoo@PLT
+; LINUX: 	movl	pfoo@GOT(%[[REG3]]),
 ; LINUX: 	calll	*
 }
 
diff --git a/test/CodeGen/X86/pmulld.ll b/test/CodeGen/X86/pmulld.ll
index 3ef5941..be527ae 100644
--- a/test/CodeGen/X86/pmulld.ll
+++ b/test/CodeGen/X86/pmulld.ll
@@ -1,8 +1,13 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41 -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse41 -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse41 -asm-verbose=0 | FileCheck %s -check-prefix=WIN64
 
 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
 ; CHECK: test1:
 ; CHECK-NEXT: pmulld
+
+; WIN64: test1:
+; WIN64-NEXT: movdqa  (%rcx), %xmm0
+; WIN64-NEXT: pmulld  (%rdx), %xmm0
   %C = mul <4 x i32> %A, %B
   ret <4 x i32> %C
 }
@@ -10,6 +15,11 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
 define <4 x i32> @test1a(<4 x i32> %A, <4 x i32> *%Bp) nounwind {
 ; CHECK: test1a:
 ; CHECK-NEXT: pmulld
+
+; WIN64: test1a:
+; WIN64-NEXT: movdqa  (%rcx), %xmm0
+; WIN64-NEXT: pmulld  (%rdx), %xmm0
+
   %B = load <4 x i32>* %Bp
   %C = mul <4 x i32> %A, %B
   ret <4 x i32> %C
diff --git a/test/CodeGen/X86/postra-licm.ll b/test/CodeGen/X86/postra-licm.ll
index 902c69b..48c48ae 100644
--- a/test/CodeGen/X86/postra-licm.ll
+++ b/test/CodeGen/X86/postra-licm.ll
@@ -2,6 +2,7 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=X86-64
 
 ; MachineLICM should be able to hoist loop invariant reload out of the loop.
+; Only linear scan needs this, -regalloc=greedy sinks the spill instead.
 ; rdar://7233099
 
 %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
@@ -68,10 +69,12 @@ bb26.preheader:                                   ; preds = %imix_test.exit
 
 bb23:                                             ; preds = %imix_test.exit
   unreachable
+; Verify that there are no loads inside the loop.
 ; X86-32: %bb26.preheader
-; X86-32: movl -16(%ebp),
-; X86-32-NEXT: .align 4
-; X86-32-NEXT: %bb28
+; X86-32: .align 4
+; X86-32-NOT: (%esp),
+; X86-32-NOT: (%ebp),
+; X86-32: jmp
 
 bb28:                                             ; preds = %bb28, %bb26.preheader
   %counter.035 = phi i32 [ %3, %bb28 ], [ 0, %bb26.preheader ] ; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/pr2659.ll b/test/CodeGen/X86/pr2659.ll
index 54d043d..ef0f9ea 100644
--- a/test/CodeGen/X86/pr2659.ll
+++ b/test/CodeGen/X86/pr2659.ll
@@ -18,7 +18,8 @@ forcond.preheader:              ; preds = %entry
 ; CHECK: movl $1
 ; CHECK-NOT: xorl
 ; CHECK-NOT: movl
-; CHECK-NEXT: je
+; CHECK-NOT: LBB
+; CHECK: je
 
 ifthen:         ; preds = %entry
   ret i32 0
diff --git a/test/CodeGen/X86/pr3366.ll b/test/CodeGen/X86/pr3366.ll
index f813e2e..1127b60 100644
--- a/test/CodeGen/X86/pr3366.ll
+++ b/test/CodeGen/X86/pr3366.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep movzbl
+; RUN: llc < %s -march=x86 -disable-cgp-branch-opts | grep movzbl
 ; PR3366
 
 define void @_ada_c34002a() nounwind {
diff --git a/test/CodeGen/X86/pr3495-2.ll b/test/CodeGen/X86/pr3495-2.ll
index 98c064a..a4204e5 100644
--- a/test/CodeGen/X86/pr3495-2.ll
+++ b/test/CodeGen/X86/pr3495-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of loads added} | grep 1
+; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats -regalloc=linearscan |& grep {Number of loads added} | grep 1
 ; PR3495
 ;
 ; This test may not be testing what it was supposed to test.
diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll
index e84a84f..c612a6e 100644
--- a/test/CodeGen/X86/pr3495.ll
+++ b/test/CodeGen/X86/pr3495.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -stats |& grep {Number of loads added} | grep 2
-; RUN: llc < %s -march=x86 -stats |& grep {Number of register spills} | grep 1
-; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 34
+; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of loads added} | grep 2
+; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of register spills} | grep 1
+; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of machine instrs printed} | grep 34
 ; PR3495
 
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/pr9743.ll b/test/CodeGen/X86/pr9743.ll
new file mode 100644
index 0000000..8feccd9
--- /dev/null
+++ b/test/CodeGen/X86/pr9743.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -disable-fp-elim -asm-verbose=0 | FileCheck %s
+
+define void @f() {
+  ret void
+}
+
+; CHECK:       .cfi_startproc
+; CHECK-NEXT:  pushq
+; CHECK-NEXT: :
+; CHECK-NEXT:  .cfi_def_cfa_offset 16
+; CHECK-NEXT: :
+; CHECK-NEXT:  .cfi_offset 6, -16
+; CHECK-NEXT:  movq    %rsp, %rbp
+; CHECK-NEXT: :
+; CHECK-NEXT:  .cfi_def_cfa_register 6
+; CHECK-NEXT:  popq    %rbp
+; CHECK-NEXT:  ret
diff --git a/test/CodeGen/X86/pre-split1.ll b/test/CodeGen/X86/pre-split1.ll
index e89b507..b55bf57 100644
--- a/test/CodeGen/X86/pre-split1.ll
+++ b/test/CodeGen/X86/pre-split1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 ; XFAIL: *
 
diff --git a/test/CodeGen/X86/pre-split10.ll b/test/CodeGen/X86/pre-split10.ll
index db039bd..83c6450 100644
--- a/test/CodeGen/X86/pre-split10.ll
+++ b/test/CodeGen/X86/pre-split10.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan
 
 define i32 @main(i32 %argc, i8** %argv) nounwind {
 entry:
diff --git a/test/CodeGen/X86/pre-split11.ll b/test/CodeGen/X86/pre-split11.ll
index 0a9f4e3..3d549f9 100644
--- a/test/CodeGen/X86/pre-split11.ll
+++ b/test/CodeGen/X86/pre-split11.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -pre-alloc-split | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -pre-alloc-split -regalloc=linearscan | FileCheck %s
 
 @.str = private constant [28 x i8] c"\0A\0ADOUBLE            D = %f\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
 @.str1 = private constant [37 x i8] c"double to long    l1 = %ld\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/pre-split2.ll b/test/CodeGen/X86/pre-split2.ll
index ba902f9..670737b 100644
--- a/test/CodeGen/X86/pre-split2.ll
+++ b/test/CodeGen/X86/pre-split2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats -regalloc=linearscan |& \
 ; RUN:   grep {pre-alloc-split} | count 2
 
 define i32 @t(i32 %arg) {
diff --git a/test/CodeGen/X86/pre-split3.ll b/test/CodeGen/X86/pre-split3.ll
index 2e31420..0c49a91 100644
--- a/test/CodeGen/X86/pre-split3.ll
+++ b/test/CodeGen/X86/pre-split3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 
 define i32 @t(i32 %arg) {
diff --git a/test/CodeGen/X86/pre-split4.ll b/test/CodeGen/X86/pre-split4.ll
index 10cef27..37d1ac6 100644
--- a/test/CodeGen/X86/pre-split4.ll
+++ b/test/CodeGen/X86/pre-split4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 2
 
 define i32 @main(i32 %argc, i8** %argv) nounwind {
diff --git a/test/CodeGen/X86/pre-split5.ll b/test/CodeGen/X86/pre-split5.ll
index 8def460..9f41f24 100644
--- a/test/CodeGen/X86/pre-split5.ll
+++ b/test/CodeGen/X86/pre-split5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan
 
 target triple = "i386-apple-darwin9.5"
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/CodeGen/X86/pre-split6.ll b/test/CodeGen/X86/pre-split6.ll
index 837e238..d8f274d 100644
--- a/test/CodeGen/X86/pre-split6.ll
+++ b/test/CodeGen/X86/pre-split6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -pre-alloc-split | grep {divsd	24} | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -pre-alloc-split -regalloc=linearscan | grep {divsd	24} | count 1
 
 @current_surfaces.b = external global i1		; <i1*> [#uses=1]
 
diff --git a/test/CodeGen/X86/pre-split7.ll b/test/CodeGen/X86/pre-split7.ll
index 0b81c0b..8c93faa 100644
--- a/test/CodeGen/X86/pre-split7.ll
+++ b/test/CodeGen/X86/pre-split7.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan
 
 @object_distance = external global double, align 8		; <double*> [#uses=1]
 @axis_slope_angle = external global double, align 8		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/pre-split8.ll b/test/CodeGen/X86/pre-split8.ll
index 0684bd0..7e6ad6e1 100644
--- a/test/CodeGen/X86/pre-split8.ll
+++ b/test/CodeGen/X86/pre-split8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 
 @current_surfaces.b = external global i1		; <i1*> [#uses=1]
diff --git a/test/CodeGen/X86/pre-split9.ll b/test/CodeGen/X86/pre-split9.ll
index 86dda33..951e6fb 100644
--- a/test/CodeGen/X86/pre-split9.ll
+++ b/test/CodeGen/X86/pre-split9.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
 ; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
 
 @current_surfaces.b = external global i1		; <i1*> [#uses=1]
diff --git a/test/CodeGen/X86/remat-scalar-zero.ll b/test/CodeGen/X86/remat-scalar-zero.ll
index 2da96ab..f6f0ed1 100644
--- a/test/CodeGen/X86/remat-scalar-zero.ll
+++ b/test/CodeGen/X86/remat-scalar-zero.ll
@@ -1,3 +1,4 @@
+; XFAIL: *
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu > %t
 ; RUN: not grep xor %t
 ; RUN: not grep movap %t
diff --git a/test/CodeGen/X86/scalar-min-max-fill-operand.ll b/test/CodeGen/X86/scalar-min-max-fill-operand.ll
index fe40758..2f90932 100644
--- a/test/CodeGen/X86/scalar-min-max-fill-operand.ll
+++ b/test/CodeGen/X86/scalar-min-max-fill-operand.ll
@@ -1,6 +1,13 @@
-; RUN: llc < %s -march=x86-64 | grep min | count 1
-; RUN: llc < %s -march=x86-64 | grep max | count 1
-; RUN: llc < %s -march=x86-64 | grep mov | count 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     mov
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     min
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     mov
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     max
+; CHECK-NOT:     {{(min|max|mov)}}
 
 declare float @bar()
 
diff --git a/test/CodeGen/X86/sext-i1.ll b/test/CodeGen/X86/sext-i1.ll
index 21c418d..574769b 100644
--- a/test/CodeGen/X86/sext-i1.ll
+++ b/test/CodeGen/X86/sext-i1.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=32
-; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=64
+; RUN: llc < %s -march=x86 -disable-cgp-branch-opts    | FileCheck %s -check-prefix=32
+; RUN: llc < %s -march=x86-64 -disable-cgp-branch-opts | FileCheck %s -check-prefix=64
 ; rdar://7573216
 ; PR6146
 
diff --git a/test/CodeGen/X86/shrink-compare.ll b/test/CodeGen/X86/shrink-compare.ll
new file mode 100644
index 0000000..8d4b07f
--- /dev/null
+++ b/test/CodeGen/X86/shrink-compare.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+declare void @bar()
+
+define void @test1(i32* nocapture %X) nounwind {
+entry:
+  %tmp1 = load i32* %X, align 4
+  %and = and i32 %tmp1, 255
+  %cmp = icmp eq i32 %and, 47
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+; CHECK: test1:
+; CHECK: cmpb $47, (%{{rdi|rcx}})
+}
+
+define void @test2(i32 %X) nounwind {
+entry:
+  %and = and i32 %X, 255
+  %cmp = icmp eq i32 %and, 47
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+; CHECK: test2:
+; CHECK: cmpb $47, %{{dil|cl}}
+}
diff --git a/test/CodeGen/X86/sse-align-0.ll b/test/CodeGen/X86/sse-align-0.ll
index b12a87d..8ffd312 100644
--- a/test/CodeGen/X86/sse-align-0.ll
+++ b/test/CodeGen/X86/sse-align-0.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 | not grep mov
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     mov
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   %t = load <4 x float>* %p
diff --git a/test/CodeGen/X86/sse-align-3.ll b/test/CodeGen/X86/sse-align-3.ll
index c42f7f0..04f2161 100644
--- a/test/CodeGen/X86/sse-align-3.ll
+++ b/test/CodeGen/X86/sse-align-3.ll
@@ -1,4 +1,9 @@
-; RUN: llc < %s -march=x86-64 | grep movap | count 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     movapd
+; CHECK:     movaps
+; CHECK-NOT:     movaps
+; CHECK:     movapd
+; CHECK-NOT:     movap
 
 define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   store <4 x float> %x, <4 x float>* %p
diff --git a/test/CodeGen/X86/sse-align-7.ll b/test/CodeGen/X86/sse-align-7.ll
index 5784481..e55d585 100644
--- a/test/CodeGen/X86/sse-align-7.ll
+++ b/test/CodeGen/X86/sse-align-7.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86-64 | grep movaps | count 1
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK:     movaps
+; CHECK-NOT:     movaps
 
 define void @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
   store <2 x i64> %x, <2 x i64>* %p
diff --git a/test/CodeGen/X86/sse-commute.ll b/test/CodeGen/X86/sse-commute.ll
index 38ed644..336bf06 100644
--- a/test/CodeGen/X86/sse-commute.ll
+++ b/test/CodeGen/X86/sse-commute.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
 
 ; Commute the comparison to avoid a move.
 ; PR7500.
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 5c3e32f..70e0a8a 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -178,9 +178,9 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
         %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp27
 ; CHECK: test14:
-; CHECK: 	addps	%xmm1, %xmm0
-; CHECK: 	subps	%xmm1, %xmm2
-; CHECK: 	movlhps	%xmm2, %xmm0
+; CHECK: 	addps	[[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
+; CHECK: 	subps	[[X1]], [[X2:%xmm[0-9]+]]
+; CHECK: 	movlhps	[[X2]], [[X0]]
 }
 
 define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 9a60091..8e72f13 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -168,12 +168,12 @@ define internal void @t10() nounwind {
         store <4 x i16> %6, <4 x i16>* @g2, align 8
         ret void
 ; X64: 	t10:
-; X64: 		pextrw	$4, %xmm0, %eax
-; X64: 		unpcklpd %xmm1, %xmm1
-; X64: 		pshuflw	$8, %xmm1, %xmm1
-; X64: 		pinsrw	$2, %eax, %xmm1
-; X64: 		pextrw	$6, %xmm0, %eax
-; X64: 		pinsrw	$3, %eax, %xmm1
+; X64: 		pextrw	$4, [[X0:%xmm[0-9]+]], %eax
+; X64: 		unpcklpd [[X1:%xmm[0-9]+]]
+; X64: 		pshuflw	$8, [[X1]], [[X1]]
+; X64: 		pinsrw	$2, %eax, [[X1]]
+; X64: 		pextrw	$6, [[X0]], %eax
+; X64: 		pinsrw	$3, %eax, [[X1]]
 }
 
 
diff --git a/test/CodeGen/X86/sse_reload_fold.ll b/test/CodeGen/X86/sse_reload_fold.ll
index dc3d6fe..02399c4 100644
--- a/test/CodeGen/X86/sse_reload_fold.ll
+++ b/test/CodeGen/X86/sse_reload_fold.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86-64 -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& \
-; RUN:   grep fail | count 1
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& FileCheck %s
+; CHECK: fail
+; CHECK-NOT: fail
 
 declare float @test_f(float %f)
 declare double @test_d(double %f)
diff --git a/test/CodeGen/X86/stdarg.ll b/test/CodeGen/X86/stdarg.ll
index 9778fa1..5728daf 100644
--- a/test/CodeGen/X86/stdarg.ll
+++ b/test/CodeGen/X86/stdarg.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 | grep {testb	\[%\]al, \[%\]al}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK: testb %al, %al
 
 %struct.__va_list_tag = type { i32, i32, i8*, i8* }
 
diff --git a/test/CodeGen/X86/stride-nine-with-base-reg.ll b/test/CodeGen/X86/stride-nine-with-base-reg.ll
index f4847a3..ddf059c 100644
--- a/test/CodeGen/X86/stride-nine-with-base-reg.ll
+++ b/test/CodeGen/X86/stride-nine-with-base-reg.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 -relocation-model=static | not grep lea
-; RUN: llc < %s -march=x86-64 | not grep lea
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux               | FileCheck %s
+; CHECK-NOT:     lea
 
 ; P should be sunk into the loop and folded into the address mode. There
 ; shouldn't be any lea instructions inside the loop.
diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll
index 5cbd895..1251a24 100644
--- a/test/CodeGen/X86/stride-reuse.ll
+++ b/test/CodeGen/X86/stride-reuse.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 | not grep lea
-; RUN: llc < %s -march=x86-64 | not grep lea
+; RUN: llc < %s -march=x86            | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     lea
 
 @B = external global [1000 x float], align 32
 @A = external global [1000 x float], align 32
diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll
index 19f4079..4522e91 100644
--- a/test/CodeGen/X86/sub-with-overflow.ll
+++ b/test/CodeGen/X86/sub-with-overflow.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {jo} | count 1
-; RUN: llc < %s -march=x86 | grep {jb} | count 1
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
@@ -18,6 +17,10 @@ normal:
 overflow:
   %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
+
+; CHECK: func1:
+; CHECK: subl 20(%esp)
+; CHECK-NEXT: jo
 }
 
 define i1 @func2(i32 %v1, i32 %v2) nounwind {
@@ -34,8 +37,23 @@ normal:
 carry:
   %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
+
+; CHECK: func2:
+; CHECK: subl 20(%esp)
+; CHECK-NEXT: jb
 }
 
 declare i32 @printf(i8*, ...) nounwind
 declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32)
 declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32)
+
+define i1 @func3(i32 %x) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %x, i32 1)
+  %obit = extractvalue {i32, i1} %t, 1
+  ret i1 %obit
+
+; CHECK: func3:
+; CHECK: decl
+; CHECK-NEXT: seto
+}
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index 9291695..77710ad 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -109,15 +109,15 @@ altret:
 
 ; CHECK: dont_merge_oddly:
 ; CHECK-NOT:   ret
-; CHECK:        ucomiss %xmm1, %xmm2
+; CHECK:        ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
 ; CHECK-NEXT:   jbe .LBB2_3
-; CHECK-NEXT:   ucomiss %xmm0, %xmm1
+; CHECK-NEXT:   ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
 ; CHECK-NEXT:   ja .LBB2_4
 ; CHECK-NEXT: .LBB2_2:
 ; CHECK-NEXT:   movb $1, %al
 ; CHECK-NEXT:   ret
 ; CHECK-NEXT: .LBB2_3:
-; CHECK-NEXT:   ucomiss %xmm0, %xmm2
+; CHECK-NEXT:   ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
 ; CHECK-NEXT:   jbe .LBB2_2
 ; CHECK-NEXT: .LBB2_4:
 ; CHECK-NEXT:   xorb %al, %al
@@ -153,16 +153,20 @@ bb30:
 ; an unconditional jump to complete a two-way conditional branch.
 
 ; CHECK: c_expand_expr_stmt:
-; CHECK:        jmp .LBB3_11
-; CHECK-NEXT: .LBB3_9:
-; CHECK-NEXT:   movq 8(%rax), %rax
-; CHECK-NEXT:   xorb %dl, %dl
-; CHECK-NEXT:   movb 16(%rax), %al
-; CHECK-NEXT:   cmpb $16, %al
-; CHECK-NEXT:   je .LBB3_11
-; CHECK-NEXT:   cmpb $23, %al
-; CHECK-NEXT:   jne .LBB3_14
-; CHECK-NEXT: .LBB3_11:
+;
+; This test only works when register allocation happens to use %rax for both
+; load addresses.
+;
+; CHE:        jmp .LBB3_11
+; CHE-NEXT: .LBB3_9:
+; CHE-NEXT:   movq 8(%rax), %rax
+; CHE-NEXT:   xorb %dl, %dl
+; CHE-NEXT:   movb 16(%rax), %al
+; CHE-NEXT:   cmpb $16, %al
+; CHE-NEXT:   je .LBB3_11
+; CHE-NEXT:   cmpb $23, %al
+; CHE-NEXT:   jne .LBB3_14
+; CHE-NEXT: .LBB3_11:
 
 %0 = type { %struct.rtx_def* }
 %struct.lang_decl = type opaque
diff --git a/test/CodeGen/X86/tailcall-returndup-void.ll b/test/CodeGen/X86/tailcall-returndup-void.ll
new file mode 100644
index 0000000..c1d6312
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-returndup-void.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK: rBM_info
+; CHECK-NOT: ret
+
+@sES_closure = external global [0 x i64]
+declare cc10 void @sEH_info(i64* noalias nocapture, i64* noalias nocapture, i64* noalias nocapture, i64, i64, i64) align 8
+
+define cc10 void @rBM_info(i64* noalias nocapture %Base_Arg, i64* noalias nocapture %Sp_Arg, i64* noalias nocapture %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg) nounwind align 8 {
+c263:
+  %ln265 = getelementptr inbounds i64* %Sp_Arg, i64 -2
+  %ln266 = ptrtoint i64* %ln265 to i64
+  %ln268 = icmp ult i64 %ln266, %R3_Arg
+  br i1 %ln268, label %c26a, label %n26p
+
+n26p:                                             ; preds = %c263
+  br i1 icmp ne (i64 and (i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 7), i64 0), label %c1ZP.i, label %n1ZQ.i
+
+n1ZQ.i:                                           ; preds = %n26p
+  %ln1ZT.i = load i64* getelementptr inbounds ([0 x i64]* @sES_closure, i64 0, i64 0), align 8
+  %ln1ZU.i = inttoptr i64 %ln1ZT.i to void (i64*, i64*, i64*, i64, i64, i64)*
+  tail call cc10 void %ln1ZU.i(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 %R3_Arg) nounwind
+  br label %rBL_info.exit
+
+c1ZP.i:                                           ; preds = %n26p
+  tail call cc10 void @sEH_info(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 %R3_Arg) nounwind
+  br label %rBL_info.exit
+
+rBL_info.exit:                                    ; preds = %c1ZP.i, %n1ZQ.i
+  ret void
+
+c26a:                                             ; preds = %c263
+  %ln27h = getelementptr inbounds i64* %Base_Arg, i64 -2
+  %ln27j = load i64* %ln27h, align 8
+  %ln27k = inttoptr i64 %ln27j to void (i64*, i64*, i64*, i64, i64, i64)*
+  tail call cc10 void %ln27k(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll
index 7c685b8..1b1efe7 100644
--- a/test/CodeGen/X86/tailcallbyval64.ll
+++ b/test/CodeGen/X86/tailcallbyval64.ll
@@ -1,15 +1,30 @@
-; RUN: llc < %s -march=x86-64  -tailcallopt  | grep TAILCALL
+; RUN: llc < %s -mtriple=x86_64-linux  -tailcallopt  | FileCheck %s
+
+; FIXME: Win64 does not support byval.
+
+; Expect the entry point.
+; CHECK: tailcaller:
+
 ; Expect 2 rep;movs because of tail call byval lowering.
-; RUN: llc < %s -march=x86-64  -tailcallopt  | grep rep | wc -l | grep 2
+; CHECK: rep;
+; CHECK: rep;
+
 ; A sequence of copyto/copyfrom virtual registers is used to deal with byval
 ; lowering appearing after moving arguments to registers. The following two
 ; checks verify that the register allocator changes those sequences to direct
 ; moves to argument register where it can (for registers that are not used in 
 ; byval lowering - not rsi, not rdi, not rcx).
 ; Expect argument 4 to be moved directly to register edx.
-; RUN: llc < %s -march=x86-64  -tailcallopt  | grep movl | grep {7} | grep edx
+; CHECK: movl $7, %edx
+
 ; Expect argument 6 to be moved directly to register r8.
-; RUN: llc < %s -march=x86-64  -tailcallopt  | grep movl | grep {17} | grep r8
+; CHECK: movl $17, %r8d
+
+; Expect not call but jmp to @tailcallee.
+; CHECK: jmp tailcallee
+
+; Expect the trailer.
+; CHECK: .size tailcaller
 
 %struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
                    i64, i64, i64, i64, i64, i64, i64, i64,
@@ -25,5 +40,3 @@ entry:
         %tmp4 = tail call fastcc i64 @tailcallee(%struct.s* %a byval, i64 %tmp3, i64 %b, i64 7, i64 13, i64 17)
         ret i64 %tmp4
 }
-
-
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index 0c732d5..060ce0f 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -2,19 +2,19 @@
 ; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
 
 ; FIXME: Redundant unused stack allocation could be eliminated.
-; CHECK: subq  ${{24|88}}, %rsp
+; CHECK: subq  ${{24|72}}, %rsp
 
 ; Check that lowered arguments on the stack do not overwrite each other.
 ; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: movl  [[A1:32|144]](%rsp), %eax
+; CHECK: movl  [[A1:32|144]](%rsp), [[R1:%e..]]
 ; Move param %in1 to temp register (%r10d).
-; CHECK: movl  [[A2:40|152]](%rsp), %r10d
+; CHECK: movl  [[A2:40|152]](%rsp), [[R2:%[a-z0-9]+]]
 ; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: addl {{%edi|%ecx}}, %eax
+; CHECK: addl {{%edi|%ecx}}, [[R1]]
 ; Move param %in2 to stack.
-; CHECK: movl  %r10d, [[A1]](%rsp)
+; CHECK: movl  [[R2]], [[A1]](%rsp)
 ; Move result of addition to stack.
-; CHECK: movl  %eax, [[A2]](%rsp)
+; CHECK: movl  [[R1]], [[A2]](%rsp)
 ; Eventually, do a TAILCALL
 ; CHECK: TAILCALL
 
diff --git a/test/CodeGen/X86/test-nofold.ll b/test/CodeGen/X86/test-nofold.ll
index f1063dc..97db1b3 100644
--- a/test/CodeGen/X86/test-nofold.ll
+++ b/test/CodeGen/X86/test-nofold.ll
@@ -2,10 +2,10 @@
 ; rdar://5752025
 
 ; We want:
-;      CHECK: movl	4(%esp), %ecx
-; CHECK-NEXT: andl	$15, %ecx
-; CHECK-NEXT: movl	$42, %eax
-; CHECK-NEXT: cmovel	%ecx, %eax
+;      CHECK: movl	$42, %ecx
+; CHECK-NEXT: movl	4(%esp), %eax
+; CHECK-NEXT: andl	$15, %eax
+; CHECK-NEXT: cmovnel	%ecx, %eax
 ; CHECK-NEXT: ret
 ;
 ; We don't want:
diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
index ec16dfe..a1d797f 100644
--- a/test/CodeGen/X86/twoaddr-lea.ll
+++ b/test/CodeGen/X86/twoaddr-lea.ll
@@ -34,3 +34,14 @@ entry:
  %add5 = add i32 %add3, %d
  ret i32 %add5
 }
+
+; rdar://9002648
+define i64 @test3(i64 %x) nounwind readnone ssp {
+entry:
+; CHECK: test3:
+; CHECK: leaq (%rdi,%rdi), %rax
+; CHECK-NOT: addq
+; CHECK-NEXT: ret
+  %0 = shl i64 %x, 1
+  ret i64 %0
+}
diff --git a/test/CodeGen/X86/umulo-64.ll b/test/CodeGen/X86/umulo-64.ll
deleted file mode 100644
index 280bd9c..0000000
--- a/test/CodeGen/X86/umulo-64.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin
-
-%0 = type { i64, i1 }
-
-define i32 @f0(i64 %a, i64 %b) nounwind ssp {
-  %1 = alloca i64, align 4
-  %2 = alloca i64, align 4
-  store i64 %a, i64* %1, align 8
-  store i64 %b, i64* %2, align 8
-  %3 = load i64* %1, align 8
-  %4 = load i64* %2, align 8
-  %5 = call %0 @llvm.smul.with.overflow.i64(i64 %3, i64 %4)
-  %6 = extractvalue %0 %5, 0
-  %7 = extractvalue %0 %5, 1
-  br i1 %7, label %8, label %9
-
-; <label>:8                                       ; preds = %0
-  call void @llvm.trap()
-  unreachable
-
-; <label>:9                                       ; preds = %0
-  %10 = trunc i64 %6 to i32
-  ret i32 %10
-}
-
-declare %0 @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
-
-declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll
index 6a493c0..9f70489 100644
--- a/test/CodeGen/X86/unaligned-load.ll
+++ b/test/CodeGen/X86/unaligned-load.ll
@@ -29,8 +29,8 @@ return:
 declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
 
 ; CORE2: .section
-; CORE2: .align  4
+; CORE2: .align  3
 ; CORE2-NEXT: _.str1:
 ; CORE2-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
-; CORE2: .align 4
+; CORE2: .align 3
 ; CORE2-NEXT: _.str3:
diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll
index 09431b5..b89c473 100644
--- a/test/CodeGen/X86/unknown-location.ll
+++ b/test/CodeGen/X86/unknown-location.ll
@@ -4,16 +4,11 @@
 ; represent this in the debug information. This is done by setting line
 ; and column to 0
 
-;      CHECK:         leal    (%rdi,%rsi), %eax
+;      CHECK:         leal
 ; CHECK-NEXT:         .loc 1 0 0
-; CHECK-NEXT: Ltmp
-; CHECK-NEXT:         cltd
-; CHECK-NEXT:         idivl   %r8d
-; CHECK-NEXT:         .loc 1 4 3
-; CHECK-NEXT: Ltmp
-; CHECK-NEXT:         addl    %ecx, %eax
-; CHECK-NEXT:         ret
-; CHECK-NEXT: Ltmp
+;      CHECK:         cltd
+; CHECK-NEXT:         idivl
+; CHECK-NEXT:         .loc 2 4 3
 
 define i32 @foo(i32 %w, i32 %x, i32 %y, i32 %z) nounwind {
 entry:
diff --git a/test/CodeGen/X86/unreachable-stack-protector.ll b/test/CodeGen/X86/unreachable-stack-protector.ll
new file mode 100644
index 0000000..eeebcee
--- /dev/null
+++ b/test/CodeGen/X86/unreachable-stack-protector.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+
+define void @test5() nounwind optsize noinline ssp {
+entry:
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
+  %buf = alloca [64 x i8], align 16
+  %0 = call i64 @llvm.objectsize.i64(i8* undef, i1 false)
+  br i1 false, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  unreachable
+
+if.end:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/v2f32.ll b/test/CodeGen/X86/v2f32.ll
index 76c3fdf..6d14099 100644
--- a/test/CodeGen/X86/v2f32.ll
+++ b/test/CodeGen/X86/v2f32.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=W64
 ; RUN: llc < %s -mcpu=yonah -march=x86 -asm-verbose=0 -o - | FileCheck %s -check-prefix=X32
 
 ; PR7518
@@ -15,6 +16,13 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind {
 ; X64-NEXT: movss	%xmm1, (%rdi)
 ; X64-NEXT: ret
 
+; W64: test1:
+; W64-NEXT: movdqa  (%rcx), %xmm0
+; W64-NEXT: pshufd  $1, %xmm0, %xmm1
+; W64-NEXT: addss   %xmm0, %xmm1
+; W64-NEXT: movss   %xmm1, (%rdx)
+; W64-NEXT: ret
+
 ; X32: test1:
 ; X32-NEXT: pshufd	$1, %xmm0, %xmm1
 ; X32-NEXT: addss	%xmm0, %xmm1
@@ -31,6 +39,14 @@ define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounw
 ; X64: test2:
 ; X64-NEXT: addps	%xmm1, %xmm0
 ; X64-NEXT: ret
+
+; W64: test2:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   (%rdx), %xmm0
+; W64-NEXT: ret
+
+; X32: test2:
+; X32:      addps	%xmm1, %xmm0
 }
 
 
@@ -38,17 +54,35 @@ define <2 x float> @test3(<4 x float> %A) nounwind {
 	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
 	%C = fadd <2 x float> %B, %B
 	ret <2 x float> %C
-; CHECK: test3:
-; CHECK-NEXT: 	addps	%xmm0, %xmm0
-; CHECK-NEXT: 	ret
+; X64: test3:
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: ret
+
+; W64: test3:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: ret
+
+; X32: test3:
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: ret
 }
 
 define <2 x float> @test4(<2 x float> %A) nounwind {
 	%C = fadd <2 x float> %A, %A
 	ret <2 x float> %C
-; CHECK: test4:
-; CHECK-NEXT: 	addps	%xmm0, %xmm0
-; CHECK-NEXT: 	ret
+; X64: test4:
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: ret
+
+; W64: test4:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: ret
+
+; X32: test4:
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: ret
 }
 
 define <4 x float> @test5(<4 x float> %A) nounwind {
@@ -61,10 +95,21 @@ BB:
 	%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 	ret <4 x float> %E
         
-; CHECK: _test5:
-; CHECK-NEXT: 	addps	%xmm0, %xmm0
-; CHECK-NEXT: 	addps	%xmm0, %xmm0
-; CHECK-NEXT: 	ret
+; X64: test5:
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: ret
+
+; W64: test5:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: ret
+
+; X32: test5:
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: ret
 }
 
 
diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll
index 95289c9..90d39d0 100644
--- a/test/CodeGen/X86/vec_cast.ll
+++ b/test/CodeGen/X86/vec_cast.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mcpu=core2
-
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core2
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=core2
 
 define <8 x i32> @a(<8 x i16> %a) nounwind {
   %c = sext <8 x i16> %a to <8 x i32>
diff --git a/test/CodeGen/X86/vec_set-8.ll b/test/CodeGen/X86/vec_set-8.ll
index 9697f11..66056d0 100644
--- a/test/CodeGen/X86/vec_set-8.ll
+++ b/test/CodeGen/X86/vec_set-8.ll
@@ -1,5 +1,8 @@
-; RUN: llc < %s -march=x86-64 | not grep movsd
-; RUN: llc < %s -march=x86-64 | grep {movd.*%rdi,.*%xmm0}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK-NOT: movsd
+; CHECK: movd {{%rdi|%rcx}}, %xmm0
+; CHECK-NOT: movsd
 
 define <2 x i64> @test(i64 %i) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-16.ll b/test/CodeGen/X86/vec_shuffle-16.ll
index 470f676..2ee87fe 100644
--- a/test/CodeGen/X86/vec_shuffle-16.ll
+++ b/test/CodeGen/X86/vec_shuffle-16.ll
@@ -1,27 +1,36 @@
-; RUN: llc < %s -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin -o %t
-; RUN: grep shufps %t | count 4
-; RUN: grep movaps %t | count 2
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t
-; RUN: grep pshufd %t | count 4
-; RUN: not grep shufps %t
-; RUN: not grep mov %t
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse2
 
 define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind  {
+; sse: movaps
+; sse: shufps
+; sse2: pshufd
+; sse2-NEXT: ret
         %tmp1 = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
         ret <4 x float> %tmp1
 }
 
 define <4 x float> @t2(<4 x float> %A, <4 x float> %B) nounwind {
+; sse: shufps
+; sse2: pshufd
+; sse2-NEXT: ret
 	%tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >
 	ret <4 x float> %tmp
 }
 
 define <4 x float> @t3(<4 x float> %A, <4 x float> %B) nounwind {
+; sse: movaps
+; sse: shufps
+; sse2: pshufd
+; sse2-NEXT: ret
 	%tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 4, i32 4, i32 4, i32 4 >
 	ret <4 x float> %tmp
 }
 
 define <4 x float> @t4(<4 x float> %A, <4 x float> %B) nounwind {
+; sse: shufps
+; sse2: pshufd
+; sse2-NEXT: ret
 	%tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 1, i32 3, i32 2, i32 0 >
 	ret <4 x float> %tmp
 }
diff --git a/test/CodeGen/X86/vec_shuffle-17.ll b/test/CodeGen/X86/vec_shuffle-17.ll
index 9c33abb..ebc8c5b 100644
--- a/test/CodeGen/X86/vec_shuffle-17.ll
+++ b/test/CodeGen/X86/vec_shuffle-17.ll
@@ -1,5 +1,8 @@
-; RUN: llc < %s -march=x86-64 | grep {movd.*%rdi, %xmm0}
-; RUN: llc < %s -march=x86-64 | not grep xor
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK-NOT: xor
+; CHECK: movd {{%rdi|%rcx}}, %xmm0
+; CHECK-NOT: xor
 ; PR2108
 
 define <2 x i64> @doload64(i64 %x) nounwind  {
diff --git a/test/CodeGen/X86/vec_uint_to_fp.ll b/test/CodeGen/X86/vec_uint_to_fp.ll
new file mode 100644
index 0000000..39e7d71
--- /dev/null
+++ b/test/CodeGen/X86/vec_uint_to_fp.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=sandybridge | FileCheck %s
+
+; Test that we are not lowering uinttofp to scalars
+define <4 x float> @test1(<4 x i32> %A) nounwind {
+; CHECK: test1:
+; CHECK-NOT: cvtsd2ss
+; CHECK: ret
+  %C = uitofp <4 x i32> %A to <4 x float>
+  ret <4 x float> %C
+}
+
diff --git a/test/CodeGen/X86/visibility.ll b/test/CodeGen/X86/visibility.ll
index a8d2870..580c3dc 100644
--- a/test/CodeGen/X86/visibility.ll
+++ b/test/CodeGen/X86/visibility.ll
@@ -1,11 +1,14 @@
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s
 
+@zed = external hidden constant i32
+
 define hidden void @foo() nounwind {
 entry:
-  call void @bar()
+  call void @bar(i32* @zed)
   ret void
 }
 
-declare hidden void @bar()
+declare hidden void @bar(i32*)
 
+;CHECK: .hidden	zed
 ;CHECK: .hidden	bar
diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll
index f6c4af0..82c8252 100644
--- a/test/CodeGen/X86/widen_load-0.ll
+++ b/test/CodeGen/X86/widen_load-0.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -o - -march=x86-64 | FileCheck %s
+; RUN: llc < %s -o - -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -o - -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
 ; PR4891
 
 ; Both loads should happen before either store.
@@ -8,6 +9,11 @@
 ; CHECK: movl  %ecx, (%rdi)
 ; CHECK: movl  %eax, (%rsi)
 
+; WIN64: movl  (%rcx), %eax
+; WIN64: movl  (%rdx), %esi
+; WIN64: movl  %esi, (%rcx)
+; WIN64: movl  %eax, (%rdx)
+
 define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
 entry:
   %0 = load <2 x i16>* %b, align 2                ; <<2 x i16>> [#uses=1]
diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll
new file mode 100644
index 0000000..cbd38da
--- /dev/null
+++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
+; RUN: llc < %s -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
+; PR8777
+; PR8778
+
+define i64 @foo(i64 %n, i64 %x) nounwind {
+entry:
+
+  %buf0 = alloca i8, i64 4096, align 1
+
+; ___chkstk must adjust %rsp.
+; M64: movq  %rsp, %rbp
+; M64:       $4096, %rax
+; M64: callq ___chkstk
+; M64-NOT:   %rsp
+
+; __chkstk does not adjust %rsp.
+; W64: movq  %rsp, %rbp
+; W64:       $4096, %rax
+; W64: callq __chkstk
+; W64: subq  $4096, %rsp
+
+; Freestanding
+; EFI: movq  %rsp, %rbp
+; EFI:       $[[B0OFS:4096|4104]], %rsp
+; EFI-NOT:   call
+
+  %buf1 = alloca i8, i64 %n, align 1
+
+; M64: leaq  15(%rcx), %rax
+; M64: andq  $-16, %rax
+; M64: callq ___chkstk
+; M64-NOT:   %rsp
+; M64: movq  %rsp, %rax
+
+; W64: leaq  15(%rcx), %rax
+; W64: andq  $-16, %rax
+; W64: callq __chkstk
+; W64: subq  %rax, %rsp
+; W64: movq  %rsp, %rax
+
+; EFI: leaq  15(%rcx), [[R1:%r..]]
+; EFI: andq  $-16, [[R1]]
+; EFI: movq  %rsp, [[R64:%r..]]
+; EFI: subq  [[R1]], [[R64]]
+; EFI: movq  [[R64]], %rsp
+
+  %r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind
+
+; M64: subq  $48, %rsp
+; M64: leaq  -4096(%rbp), %r9
+; M64: movq  %rax, 32(%rsp)
+; M64: callq bar
+
+; W64: subq  $48, %rsp
+; W64: leaq  -4096(%rbp), %r9
+; W64: movq  %rax, 32(%rsp)
+; W64: callq bar
+
+; EFI: subq  $48, %rsp
+; EFI: leaq  -[[B0OFS]](%rbp), %r9
+; EFI: movq  [[R64]], 32(%rsp)
+; EFI: callq _bar
+
+  ret i64 %r
+
+; M64: movq    %rbp, %rsp
+
+; W64: movq    %rbp, %rsp
+
+}
+
+declare i64 @bar(i64, i64, i64, i8* nocapture, i8* nocapture) nounwind
diff --git a/test/CodeGen/X86/win64_vararg.ll b/test/CodeGen/X86/win64_vararg.ll
index a451318..efe8bca 100644
--- a/test/CodeGen/X86/win64_vararg.ll
+++ b/test/CodeGen/X86/win64_vararg.ll
@@ -18,3 +18,36 @@ entry:
 }
 
 declare void @llvm.va_start(i8*) nounwind
+
+; CHECK: f5:
+; CHECK: pushq
+; CHECK: leaq 56(%rsp),
+define i8* @f5(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  ret i8* %ap1
+}
+
+; CHECK: f4:
+; CHECK: pushq
+; CHECK: leaq 48(%rsp),
+define i8* @f4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  ret i8* %ap1
+}
+
+; CHECK: f3:
+; CHECK: pushq
+; CHECK: leaq 40(%rsp),
+define i8* @f3(i64 %a0, i64 %a1, i64 %a2, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  ret i8* %ap1
+}
diff --git a/test/CodeGen/X86/win_chkstk.ll b/test/CodeGen/X86/win_chkstk.ll
index 82ce81d..e4e4483 100644
--- a/test/CodeGen/X86/win_chkstk.ll
+++ b/test/CodeGen/X86/win_chkstk.ll
@@ -3,6 +3,7 @@
 ; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
 ; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64
 ; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-pc-win32-macho | FileCheck %s -check-prefix=LINUX
 
 ; Windows and mingw require a prologue helper routine if more than 4096 bytes area
 ; allocated on the stack.  Windows uses __chkstk and mingw uses __alloca.  __alloca
@@ -16,7 +17,7 @@ entry:
 ; WIN_X32:    calll __chkstk
 ; WIN_X64:    callq __chkstk
 ; MINGW_X32:  calll __alloca
-; MINGW_X64:  callq __chkstk
+; MINGW_X64:  callq ___chkstk
 ; LINUX-NOT:  call __chkstk
   %array4096 = alloca [4096 x i8], align 16       ; <[4096 x i8]*> [#uses=0]
   ret i32 0
diff --git a/test/CodeGen/X86/x86-64-malloc.ll b/test/CodeGen/X86/x86-64-malloc.ll
index b4f1fa6..4aa0ec3 100644
--- a/test/CodeGen/X86/x86-64-malloc.ll
+++ b/test/CodeGen/X86/x86-64-malloc.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86-64 | grep {shll.*3, %edi}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK: shll $3, {{%edi|%ecx}}
 ; PR3829
 ; The generated code should multiply by 3 (sizeof i8*) as an i32,
 ; not as an i64!
diff --git a/test/CodeGen/X86/zext-extract_subreg.ll b/test/CodeGen/X86/zext-extract_subreg.ll
index e61e880..4f1dde3 100644
--- a/test/CodeGen/X86/zext-extract_subreg.ll
+++ b/test/CodeGen/X86/zext-extract_subreg.ll
@@ -13,6 +13,7 @@ if.end:                                           ; preds = %if.end.i
 ; CHECK: %if.end
 ; CHECK: movl (%{{.*}}), [[REG:%[a-z]+]]
 ; CHECK-NOT: movl [[REG]], [[REG]]
+; CHECK-NEXT: testl [[REG]], [[REG]]
 ; CHECK-NEXT: xorb
   %tmp138 = select i1 undef, i32 0, i32 %tmp7.i
   %tmp867 = zext i32 %tmp138 to i64
diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll
index bd109b9..cea9e9c 100644
--- a/test/CodeGen/X86/zext-sext.ll
+++ b/test/CodeGen/X86/zext-sext.ll
@@ -1,3 +1,4 @@
+; XFAIL: *
 ; RUN: llc < %s -march=x86-64 | FileCheck %s
 ; <rdar://problem/8006248>
 
diff --git a/test/CodeGen/XCore/events.ll b/test/CodeGen/XCore/events.ll
index 4fc2f26..30a6ec3 100644
--- a/test/CodeGen/XCore/events.ll
+++ b/test/CodeGen/XCore/events.ll
@@ -2,6 +2,7 @@
 
 declare void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* %p)
 declare i8* @llvm.xcore.waitevent()
+declare i8* @llvm.xcore.checkevent(i8*)
 declare void @llvm.xcore.clre()
 
 define i32 @f(i8 addrspace(1)* %r) nounwind {
@@ -22,3 +23,22 @@ ret:
   %retval = phi i32 [1, %L1], [2, %L2]
   ret i32 %retval
 }
+
+define i32 @g(i8 addrspace(1)* %r) nounwind {
+; CHECK: g:
+entry:
+; CHECK: clre
+  call void @llvm.xcore.clre()
+  call void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* blockaddress(@f, %L1))
+  %goto_addr = call i8* @llvm.xcore.checkevent(i8 *blockaddress(@f, %L2))
+; CHECK: setsr 1
+; CHECK: clrsr 1
+  indirectbr i8* %goto_addr, [label %L1, label %L2]
+L1:
+  br label %ret
+L2:
+  br label %ret
+ret:
+  %retval = phi i32 [1, %L1], [2, %L2]
+  ret i32 %retval
+}
diff --git a/test/CodeGen/XCore/mul64.ll b/test/CodeGen/XCore/mul64.ll
index 1dc9471..77c6b42 100644
--- a/test/CodeGen/XCore/mul64.ll
+++ b/test/CodeGen/XCore/mul64.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=xcore | FileCheck %s
+; RUN: llc < %s -march=xcore -regalloc=basic | FileCheck %s
 define i64 @umul_lohi(i32 %a, i32 %b) {
 entry:
 	%0 = zext i32 %a to i64
@@ -7,8 +8,8 @@ entry:
 	ret i64 %2
 }
 ; CHECK: umul_lohi:
-; CHECK: ldc r2, 0
-; CHECK-NEXT: lmul r1, r0, r1, r0, r2, r2
+; CHECK: ldc [[REG:r[0-9]+]], 0
+; CHECK-NEXT: lmul r1, r0, r1, r0, [[REG]], [[REG]]
 ; CHECK-NEXT: retsp 0
 
 define i64 @smul_lohi(i32 %a, i32 %b) {
@@ -19,11 +20,11 @@ entry:
 	ret i64 %2
 }
 ; CHECK: smul_lohi:
-; CHECK: ldc r2, 0
-; CHECK-NEXT: mov r3, r2
-; CHECK-NEXT: maccs r2, r3, r1, r0
-; CHECK-NEXT: mov r0, r3
-; CHECK-NEXT: mov r1, r2
+; CHECK: ldc
+; CHECK-NEXT: mov
+; CHECK-NEXT: maccs
+; CHECK-NEXT: mov r0,
+; CHECK-NEXT: mov r1,
 ; CHECK-NEXT: retsp 0
 
 define i64 @mul64(i64 %a, i64 %b) {
@@ -32,11 +33,11 @@ entry:
 	ret i64 %0
 }
 ; CHECK: mul64:
-; CHECK: ldc r11, 0
-; CHECK-NEXT: lmul r11, r4, r0, r2, r11, r11
-; CHECK-NEXT: mul r0, r0, r3
-; CHECK-NEXT: lmul r0, r1, r1, r2, r11, r0
-; CHECK-NEXT: mov r0, r4
+; CHECK: ldc
+; CHECK-NEXT: lmul
+; CHECK-NEXT: mul
+; CHECK-NEXT: lmul
+; CHECK-NEXT: mov r0,
 
 define i64 @mul64_2(i64 %a, i32 %b) {
 entry:
@@ -45,8 +46,8 @@ entry:
 	ret i64 %1
 }
 ; CHECK: mul64_2:
-; CHECK: ldc r3, 0
-; CHECK-NEXT: lmul r3, r0, r0, r2, r3, r3
-; CHECK-NEXT: mul r1, r1, r2
-; CHECK-NEXT: add r1, r3, r1
+; CHECK: ldc
+; CHECK-NEXT: lmul
+; CHECK-NEXT: mul
+; CHECK-NEXT: add r1,
 ; CHECK-NEXT: retsp 0
diff --git a/test/CodeGen/XCore/ps-intrinsics.ll b/test/CodeGen/XCore/ps-intrinsics.ll
new file mode 100644
index 0000000..92b26c7
--- /dev/null
+++ b/test/CodeGen/XCore/ps-intrinsics.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+declare i32 @llvm.xcore.getps(i32)
+declare void @llvm.xcore.setps(i32, i32)
+
+define i32 @getps(i32 %reg) nounwind {
+; CHECK: getps:
+; CHECK: get r0, ps[r0]
+	%result = call i32 @llvm.xcore.getps(i32 %reg)
+	ret i32 %result
+}
+
+
+define void @setps(i32 %reg, i32 %value) nounwind {
+; CHECK: setps:
+; CHECK: set ps[r0], r1
+	call void @llvm.xcore.setps(i32 %reg, i32 %value)
+	ret void
+}
diff --git a/test/CodeGen/XCore/resources.ll b/test/CodeGen/XCore/resources.ll
index 3389912..bd0492c 100644
--- a/test/CodeGen/XCore/resources.ll
+++ b/test/CodeGen/XCore/resources.ll
@@ -19,6 +19,9 @@ declare void @llvm.xcore.syncr.p1i8(i8 addrspace(1)* %r)
 declare void @llvm.xcore.settw.p1i8(i8 addrspace(1)* %r, i32 %value)
 declare void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* %p)
 declare void @llvm.xcore.eeu.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.setclk.p1i8.p1i8(i8 addrspace(1)* %a, i8 addrspace(1)* %b)
+declare void @llvm.xcore.setrdy.p1i8.p1i8(i8 addrspace(1)* %a, i8 addrspace(1)* %b)
+declare void @llvm.xcore.setpsc.p1i8(i8 addrspace(1)* %r, i32 %value)
 
 define i8 addrspace(1)* @getr() {
 ; CHECK: getr:
@@ -174,3 +177,24 @@ define void @eeu(i8 addrspace(1)* %r) {
 	call void @llvm.xcore.eeu.p1i8(i8 addrspace(1)* %r)
 	ret void
 }
+
+define void @setclk(i8 addrspace(1)* %a, i8 addrspace(1)* %b) {
+; CHECK: setclk
+; CHECK: setclk res[r0], r1
+	call void @llvm.xcore.setclk.p1i8.p1i8(i8 addrspace(1)* %a, i8 addrspace(1)* %b)
+	ret void
+}
+
+define void @setrdy(i8 addrspace(1)* %a, i8 addrspace(1)* %b) {
+; CHECK: setrdy
+; CHECK: setrdy res[r0], r1
+	call void @llvm.xcore.setrdy.p1i8.p1i8(i8 addrspace(1)* %a, i8 addrspace(1)* %b)
+	ret void
+}
+
+define void @setpsc(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: setpsc
+; CHECK: setpsc res[r0], r1
+	call void @llvm.xcore.setpsc.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
diff --git a/test/CodeGen/XCore/scavenging.ll b/test/CodeGen/XCore/scavenging.ll
new file mode 100644
index 0000000..3181e96
--- /dev/null
+++ b/test/CodeGen/XCore/scavenging.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=xcore
+@size = global i32 0		; <i32*> [#uses=1]
+@g0 = external global i32		; <i32*> [#uses=2]
+@g1 = external global i32		; <i32*> [#uses=2]
+@g2 = external global i32		; <i32*> [#uses=2]
+@g3 = external global i32		; <i32*> [#uses=2]
+@g4 = external global i32		; <i32*> [#uses=2]
+@g5 = external global i32		; <i32*> [#uses=2]
+@g6 = external global i32		; <i32*> [#uses=2]
+@g7 = external global i32		; <i32*> [#uses=2]
+@g8 = external global i32		; <i32*> [#uses=2]
+@g9 = external global i32		; <i32*> [#uses=2]
+@g10 = external global i32		; <i32*> [#uses=2]
+@g11 = external global i32		; <i32*> [#uses=2]
+
+define void @f() nounwind {
+entry:
+	%x = alloca [100 x i32], align 4		; <[100 x i32]*> [#uses=2]
+	%0 = load i32* @size, align 4		; <i32> [#uses=1]
+	%1 = alloca i32, i32 %0, align 4		; <i32*> [#uses=1]
+	%2 = volatile load i32* @g0, align 4		; <i32> [#uses=1]
+	%3 = volatile load i32* @g1, align 4		; <i32> [#uses=1]
+	%4 = volatile load i32* @g2, align 4		; <i32> [#uses=1]
+	%5 = volatile load i32* @g3, align 4		; <i32> [#uses=1]
+	%6 = volatile load i32* @g4, align 4		; <i32> [#uses=1]
+	%7 = volatile load i32* @g5, align 4		; <i32> [#uses=1]
+	%8 = volatile load i32* @g6, align 4		; <i32> [#uses=1]
+	%9 = volatile load i32* @g7, align 4		; <i32> [#uses=1]
+	%10 = volatile load i32* @g8, align 4		; <i32> [#uses=1]
+	%11 = volatile load i32* @g9, align 4		; <i32> [#uses=1]
+	%12 = volatile load i32* @g10, align 4		; <i32> [#uses=1]
+	%13 = volatile load i32* @g11, align 4		; <i32> [#uses=2]
+	%14 = getelementptr [100 x i32]* %x, i32 0, i32 50		; <i32*> [#uses=1]
+	store i32 %13, i32* %14, align 4
+	volatile store i32 %13, i32* @g11, align 4
+	volatile store i32 %12, i32* @g10, align 4
+	volatile store i32 %11, i32* @g9, align 4
+	volatile store i32 %10, i32* @g8, align 4
+	volatile store i32 %9, i32* @g7, align 4
+	volatile store i32 %8, i32* @g6, align 4
+	volatile store i32 %7, i32* @g5, align 4
+	volatile store i32 %6, i32* @g4, align 4
+	volatile store i32 %5, i32* @g3, align 4
+	volatile store i32 %4, i32* @g2, align 4
+	volatile store i32 %3, i32* @g1, align 4
+	volatile store i32 %2, i32* @g0, align 4
+	%x1 = getelementptr [100 x i32]* %x, i32 0, i32 0		; <i32*> [#uses=1]
+	call void @g(i32* %x1, i32* %1) nounwind
+	ret void
+}
+
+declare void @g(i32*, i32*)
diff --git a/test/CodeGen/XCore/sr-intrinsics.ll b/test/CodeGen/XCore/sr-intrinsics.ll
new file mode 100644
index 0000000..e12ed03
--- /dev/null
+++ b/test/CodeGen/XCore/sr-intrinsics.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+declare void @llvm.xcore.setsr(i32)
+declare void @llvm.xcore.clrsr(i32)
+
+define void @setsr() nounwind {
+; CHECK: setsr:
+; CHECK: setsr 128
+	call void @llvm.xcore.setsr(i32 128)
+	ret void
+}
+
+
+define void @clrsr() nounwind {
+; CHECK: clrsr:
+; CHECK: clrsr 128
+	call void @llvm.xcore.clrsr(i32 128)
+	ret void
+}
diff --git a/test/CodeGen/XCore/threads.ll b/test/CodeGen/XCore/threads.ll
new file mode 100644
index 0000000..a0558e3
--- /dev/null
+++ b/test/CodeGen/XCore/threads.ll
@@ -0,0 +1,67 @@
+; RUN: llc -march=xcore < %s | FileCheck %s
+
+declare i8 addrspace(1)* @llvm.xcore.getst.p1i8.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.msync.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.ssync()
+declare void @llvm.xcore.mjoin.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.initsp.p1i8(i8 addrspace(1)* %r, i8* %value)
+declare void @llvm.xcore.initpc.p1i8(i8 addrspace(1)* %r, i8* %value)
+declare void @llvm.xcore.initlr.p1i8(i8 addrspace(1)* %r, i8* %value)
+declare void @llvm.xcore.initcp.p1i8(i8 addrspace(1)* %r, i8* %value)
+declare void @llvm.xcore.initdp.p1i8(i8 addrspace(1)* %r, i8* %value)
+
+define i8 addrspace(1)* @getst(i8 addrspace(1)* %r) {
+; CHECK: getst:
+; CHECK: getst r0, res[r0]
+        %result = call i8 addrspace(1)* @llvm.xcore.getst.p1i8.p1i8(i8 addrspace(1)* %r)
+        ret i8 addrspace(1)* %result
+}
+
+define void @ssync() {
+; CHECK: ssync:
+; CHECK: ssync
+	call void @llvm.xcore.ssync()
+	ret void
+}
+
+define void @mjoin(i8 addrspace(1)* %r) {
+; CHECK: mjoin:
+; CHECK: mjoin res[r0]
+	call void @llvm.xcore.mjoin.p1i8(i8 addrspace(1)* %r)
+	ret void
+}
+
+define void @initsp(i8 addrspace(1)* %t, i8* %src) {
+; CHECK: initsp:
+; CHECK: init t[r0]:sp, r1
+        call void @llvm.xcore.initsp.p1i8(i8 addrspace(1)* %t, i8* %src)
+        ret void
+}
+
+define void @initpc(i8 addrspace(1)* %t, i8* %src) {
+; CHECK: initpc:
+; CHECK: init t[r0]:pc, r1
+        call void @llvm.xcore.initpc.p1i8(i8 addrspace(1)* %t, i8* %src)
+        ret void
+}
+
+define void @initlr(i8 addrspace(1)* %t, i8* %src) {
+; CHECK: initlr:
+; CHECK: init t[r0]:lr, r1
+        call void @llvm.xcore.initlr.p1i8(i8 addrspace(1)* %t, i8* %src)
+        ret void
+}
+
+define void @initcp(i8 addrspace(1)* %t, i8* %src) {
+; CHECK: initcp:
+; CHECK: init t[r0]:cp, r1
+        call void @llvm.xcore.initcp.p1i8(i8 addrspace(1)* %t, i8* %src)
+        ret void
+}
+
+define void @initdp(i8 addrspace(1)* %t, i8* %src) {
+; CHECK: initdp:
+; CHECK: init t[r0]:dp, r1
+        call void @llvm.xcore.initdp.p1i8(i8 addrspace(1)* %t, i8* %src)
+        ret void
+}
diff --git a/test/CodeGen/XCore/trampoline.ll b/test/CodeGen/XCore/trampoline.ll
index 18cc45e..4e1aba0 100644
--- a/test/CodeGen/XCore/trampoline.ll
+++ b/test/CodeGen/XCore/trampoline.ll
@@ -5,8 +5,8 @@
 define void @f() nounwind {
 entry:
 ; CHECK: f:
-; CHECK ldap r11, g.1101
-; CHECK stw r11, sp[7]
+; CHECK: ldap r11, g.1101
+; CHECK: stw r11, sp[7]
   %TRAMP.23 = alloca [20 x i8], align 2
   %FRAME.0 = alloca %struct.FRAME.f, align 4
   %TRAMP.23.sub = getelementptr inbounds [20 x i8]* %TRAMP.23, i32 0, i32 0
diff --git a/test/DebugInfo/2010-04-13-PubType.ll b/test/DebugInfo/2010-04-13-PubType.ll
index 371169f..db7bb0a 100644
--- a/test/DebugInfo/2010-04-13-PubType.ll
+++ b/test/DebugInfo/2010-04-13-PubType.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -O0 -asm-verbose < %s > %t
 ; RUN: grep "External Name" %t | grep -v X
 ; RUN: grep "External Name" %t | grep Y | count 1
-; Test to check type with no defintion is listed in pubtypes section.
+; Test to check type with no definition is listed in pubtypes section.
 %struct.X = type opaque
 %struct.Y = type { i32 }
 
diff --git a/test/DebugInfo/array.ll b/test/DebugInfo/array.ll
new file mode 100644
index 0000000..9f592a1
--- /dev/null
+++ b/test/DebugInfo/array.ll
@@ -0,0 +1,34 @@
+; RUN: llc -O0 < %s | FileCheck %s
+; Do not emit AT_upper_bound for an unbounded array.
+; radar 9241695
+define i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32, align 4
+  %a = alloca [0 x i32], align 4
+  store i32 0, i32* %retval
+  call void @llvm.dbg.declare(metadata !{[0 x i32]* %a}, metadata !6), !dbg !11
+  ret i32 0, !dbg !12
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"array.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"array.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129138)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590080, metadata !7, metadata !"a", metadata !1, i32 4, metadata !8, i32 0} ; [ DW_TAG_auto_variable ]
+!7 = metadata !{i32 589835, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 589825, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !5, metadata !9, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!9 = metadata !{metadata !10}
+;CHECK: DW_TAG_subrange_type
+;CHECK-NEXT: DW_AT_type
+;CHECK-NOT: DW_AT_lower_bound
+;CHECK-NOT: DW_AT_upper_bound
+;CHECK-NEXT: End Of Children Mark
+!10 = metadata !{i32 589857, i64 1, i64 0}        ; [ DW_TAG_subrange_type ]
+!11 = metadata !{i32 4, i32 7, metadata !7, null}
+!12 = metadata !{i32 5, i32 3, metadata !7, null}
diff --git a/test/FrontendAda/real_cst.adb b/test/FrontendAda/real_cst.adb
index c970830..75143710 100644
--- a/test/FrontendAda/real_cst.adb
+++ b/test/FrontendAda/real_cst.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -S -O2 -gnatn %s
+-- RUN: %llvmgcc -S -O2 -gnatn %s -I%p/Support
 package body Real_Cst is
    Cst : constant Float := 0.0;
    procedure Write (Stream : access Ada.Streams.Root_Stream_Type'Class) is
diff --git a/test/FrontendC++/2009-07-15-LineNumbers.cpp b/test/FrontendC++/2009-07-15-LineNumbers.cpp
deleted file mode 100644
index e1cc81f..0000000
--- a/test/FrontendC++/2009-07-15-LineNumbers.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-// This is a regression test on debug info to make sure that we can
-// print line numbers in asm.
-// RUN: %llvmgcc -S -O0 -g %s -o - | \
-// RUN:    llc --disable-fp-elim -O0 -relocation-model=pic | grep {2009-07-15-LineNumbers.cpp:25$}
-
-#include <stdlib.h>
-
-class DeepStack {
-  int seedVal;
-public:
-  DeepStack(int seed) : seedVal(seed) {}
-
-  int shallowest( int x ) { return shallower(x + 1); }
-  int shallower ( int x ) { return shallow(x + 2); }
-  int shallow   ( int x ) { return deep(x + 3); }
-  int deep      ( int x ) { return deeper(x + 4); }
-  int deeper    ( int x ) { return deepest(x + 6); }
-  int deepest   ( int x ) { return x + 7; }
-
-  int runit() { return shallowest(seedVal); }
-};
-
-int main ( int argc, char** argv) {
-
-  DeepStack DS9( (argc > 1 ? atoi(argv[1]) : 0) );
-  return DS9.runit();
-}
diff --git a/test/FrontendC/2006-05-01-AppleAlignmentPragma.c b/test/FrontendC/2006-05-01-AppleAlignmentPragma.c
index c9050aa..233968b 100644
--- a/test/FrontendC/2006-05-01-AppleAlignmentPragma.c
+++ b/test/FrontendC/2006-05-01-AppleAlignmentPragma.c
@@ -1,7 +1,7 @@
 // RUN: %llvmgcc %s -S -o -
 
 #ifdef __APPLE__
-/* test that X is layed out correctly when this pragma is used. */
+/* test that X is laid out correctly when this pragma is used. */
 #pragma options align=mac68k
 #endif
 
diff --git a/test/FrontendC/2010-07-27-MinNoFoldConst.c b/test/FrontendC/2010-07-27-MinNoFoldConst.c
index 7cd8b4c..ea711e5 100644
--- a/test/FrontendC/2010-07-27-MinNoFoldConst.c
+++ b/test/FrontendC/2010-07-27-MinNoFoldConst.c
@@ -10,7 +10,7 @@ static void bad(unsigned int v1, unsigned int v2) {
 //   MIN(1631381461u * v2 - 4047041419, 1631381461u * v1 - 4047041419)
 //
 // 1631381461u * 1273463329u = 2077504466193943669, but 32-bit overflow clips
-// this to 4047041419. This breaks the comparision implicit in the MIN().
+// this to 4047041419. This breaks the comparison implicit in the MIN().
 // Two multiply operations suggests the bad optimization is happening;
 // one multiplication, after the MIN(), is correct.
 // CHECK: mul
diff --git a/test/FrontendC/2011-03-02-UnionInitializer.c b/test/FrontendC/2011-03-02-UnionInitializer.c
new file mode 100644
index 0000000..a5ea75e
--- /dev/null
+++ b/test/FrontendC/2011-03-02-UnionInitializer.c
@@ -0,0 +1,2 @@
+// RUN: %llvmgcc -S %s
+union { int :3; double f; } u17_017 = {17.17};
diff --git a/test/FrontendC/2011-03-08-ZeroFieldUnionInitializer.c b/test/FrontendC/2011-03-08-ZeroFieldUnionInitializer.c
new file mode 100644
index 0000000..1fd8a87
--- /dev/null
+++ b/test/FrontendC/2011-03-08-ZeroFieldUnionInitializer.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s
+typedef struct {
+  union {
+    struct { } __attribute((packed));
+  };
+} fenv_t;
+const fenv_t _FE_DFL_ENV = {{{ 0, 0, 0, 0 }}};
diff --git a/test/FrontendC/2011-03-31-ArrayRefFolding.c b/test/FrontendC/2011-03-31-ArrayRefFolding.c
new file mode 100644
index 0000000..4039279
--- /dev/null
+++ b/test/FrontendC/2011-03-31-ArrayRefFolding.c
@@ -0,0 +1,15 @@
+// RUN: %llvmgcc -S -o - -m32 -Os %s | FileCheck %s
+// PR9571
+
+struct t {
+  int x;
+};
+
+extern struct t *cfun;
+
+int f(void) {
+  if (!(cfun + 0))
+// CHECK: icmp eq %struct.t* %0, null
+    return 0;
+  return cfun->x;
+}
diff --git a/test/FrontendC/cstring-align.c b/test/FrontendC/cstring-align.c
deleted file mode 100644
index 544c9f3..0000000
--- a/test/FrontendC/cstring-align.c
+++ /dev/null
@@ -1,11 +0,0 @@
-// RUN: %llvmgcc %s -S -Os -o - | llc -march=x86 -mtriple=i386-apple-darwin10 | FileCheck %s
-
-extern void func(const char *, const char *);
-
-void long_function_name() {
-  func("%s: the function name", __func__);
-}
-
-// CHECK: .align 4
-// CHECK: ___func__.
-// CHECK: .asciz "long_function_name"
diff --git a/test/FrontendC/mmx-inline-asm.c b/test/FrontendC/mmx-inline-asm.c
new file mode 100644
index 0000000..b66137c
--- /dev/null
+++ b/test/FrontendC/mmx-inline-asm.c
@@ -0,0 +1,24 @@
+// RUN: %llvmgcc -mmmx -S -o - %s | FileCheck %s
+// XFAIL: *
+// XTARGET: x86,i386,i686
+// <rdar://problem/9091220>
+#include <mmintrin.h>
+#include <stdint.h>
+
+// CHECK: type { x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx }
+
+void foo(__m64 vfill) {
+  __m64 v1, v2, v3, v4, v5, v6, v7;
+
+  __asm__ __volatile__ (
+    "\tmovq  %7, %0\n"
+    "\tmovq  %7, %1\n"
+    "\tmovq  %7, %2\n"
+    "\tmovq  %7, %3\n"
+    "\tmovq  %7, %4\n"
+    "\tmovq  %7, %5\n"
+    "\tmovq  %7, %6"
+    : "=&y" (v1), "=&y" (v2), "=&y" (v3),
+      "=&y" (v4), "=&y" (v5), "=&y" (v6), "=y" (v7)
+    : "y" (vfill));
+}
diff --git a/test/FrontendC/vla-3.c b/test/FrontendC/vla-3.c
new file mode 100644
index 0000000..eca9675
--- /dev/null
+++ b/test/FrontendC/vla-3.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc -std=gnu99 %s -S -o - | grep ".*alloca.*align 16"
+
+void adr(char *);
+
+void vlaalign(int size)
+{
+    char __attribute__((aligned(16))) tmp[size+32];
+    char tmp2[size+16];
+
+    adr(tmp);
+}
diff --git a/test/FrontendObjC/2011-03-02-ConstCFStringLiteralAlign.m b/test/FrontendObjC/2011-03-02-ConstCFStringLiteralAlign.m
new file mode 100644
index 0000000..a5bd2b7
--- /dev/null
+++ b/test/FrontendObjC/2011-03-02-ConstCFStringLiteralAlign.m
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc -S -w -m64 -mmacosx-version-min=10.5 %s -o - | \
+// RUN:     llc --disable-fp-elim -o - | FileCheck %s
+// XFAIL: *
+// XTARGET: darwin
+
+@interface Foo
+@end
+Foo *FooName = @"FooBar";
+
+// CHECK:      .section __TEXT,__cstring,cstring_literals
+// CHECK-NEXT: L_.str:
diff --git a/test/FrontendObjC/2011-03-08-IVarLookup.m b/test/FrontendObjC/2011-03-08-IVarLookup.m
new file mode 100644
index 0000000..939f2a7
--- /dev/null
+++ b/test/FrontendObjC/2011-03-08-IVarLookup.m
@@ -0,0 +1,32 @@
+// RUN: %llvmgcc -x objective-c -S -fobjc-abi-version=2 %s -o /dev/null
+// XFAIL: *
+// XTARGET: darwin
+
+typedef unsigned int UInt_t;
+
+@interface A
+{
+@protected
+  UInt_t _f1;
+}
+@end
+
+@interface B : A { }
+@end
+
+@interface A ()
+@property (assign) UInt_t f1;
+@end
+
+@interface B ()
+@property (assign) int x;
+@end
+
+@implementation B
+@synthesize x;
+- (id) init
+{
+  _f1 = 0;
+  return self;
+}
+@end
diff --git a/test/MC/ARM/arm_addrmode2.s b/test/MC/ARM/arm_addrmode2.s
new file mode 100644
index 0000000..ca99233
--- /dev/null
+++ b/test/MC/ARM/arm_addrmode2.s
@@ -0,0 +1,34 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding %s | FileCheck %s
+
+@ Post-indexed
+@ CHECK: ldrt  r1, [r0], r2 @ encoding: [0x02,0x10,0xb0,0xe6]
+@ CHECK: ldrt  r1, [r0], r2, lsr #3 @ encoding: [0xa2,0x11,0xb0,0xe6]
+@ CHECK: ldrt  r1, [r0], #4 @ encoding: [0x04,0x10,0xb0,0xe4]
+@ CHECK: ldrbt  r1, [r0], r2 @ encoding: [0x02,0x10,0xf0,0xe6]
+@ CHECK: ldrbt  r1, [r0], r2, lsr #3 @ encoding: [0xa2,0x11,0xf0,0xe6]
+@ CHECK: ldrbt  r1, [r0], #4 @ encoding: [0x04,0x10,0xf0,0xe4]
+@ CHECK: strt  r1, [r0], r2 @ encoding: [0x02,0x10,0xa0,0xe6]
+@ CHECK: strt  r1, [r0], r2, lsr #3 @ encoding: [0xa2,0x11,0xa0,0xe6]
+@ CHECK: strt  r1, [r0], #4 @ encoding: [0x04,0x10,0xa0,0xe4]
+@ CHECK: strbt  r1, [r0], r2 @ encoding: [0x02,0x10,0xe0,0xe6]
+@ CHECK: strbt  r1, [r0], r2, lsr #3 @ encoding: [0xa2,0x11,0xe0,0xe6]
+@ CHECK: strbt  r1, [r0], #4 @ encoding: [0x04,0x10,0xe0,0xe4]
+        ldrt  r1, [r0], r2
+        ldrt  r1, [r0], r2, lsr #3
+        ldrt  r1, [r0], #4
+        ldrbt  r1, [r0], r2
+        ldrbt  r1, [r0], r2, lsr #3
+        ldrbt  r1, [r0], #4
+        strt  r1, [r0], r2
+        strt  r1, [r0], r2, lsr #3
+        strt  r1, [r0], #4
+        strbt  r1, [r0], r2
+        strbt  r1, [r0], r2, lsr #3
+        strbt  r1, [r0], #4
+
+@ Pre-indexed
+@ CHECK: ldr  r1, [r0, r2, lsr #3]! @ encoding: [0xa2,0x11,0xb0,0xe7]
+@ CHECK: ldrb  r1, [r0, r2, lsr #3]! @ encoding: [0xa2,0x11,0xf0,0xe7]
+        ldr  r1, [r0, r2, lsr #3]!
+        ldrb  r1, [r0, r2, lsr #3]!
+
diff --git a/test/MC/ARM/arm_addrmode3.s b/test/MC/ARM/arm_addrmode3.s
new file mode 100644
index 0000000..0b9639e
--- /dev/null
+++ b/test/MC/ARM/arm_addrmode3.s
@@ -0,0 +1,18 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding %s | FileCheck %s
+
+@ CHECK: ldrsbt  r1, [r0], +r2 @ encoding: [0xd2,0x10,0xb0,0xe0]
+@ CHECK: ldrsbt  r1, [r0], #4 @ encoding: [0xd4,0x10,0xf0,0xe0]
+@ CHECK: ldrsht  r1, [r0], +r2 @ encoding: [0xf2,0x10,0xb0,0xe0]
+@ CHECK: ldrsht  r1, [r0], #4 @ encoding: [0xf4,0x10,0xf0,0xe0]
+@ CHECK: ldrht  r1, [r0], +r2 @ encoding: [0xb2,0x10,0xb0,0xe0]
+@ CHECK: ldrht  r1, [r0], #4 @ encoding: [0xb4,0x10,0xf0,0xe0]
+@ CHECK: strht  r1, [r0], +r2 @ encoding: [0xb2,0x10,0xa0,0xe0]
+@ CHECK: strht  r1, [r0], #4 @ encoding: [0xb4,0x10,0xe0,0xe0]
+        ldrsbt  r1, [r0], r2
+        ldrsbt  r1, [r0], #4
+        ldrsht  r1, [r0], r2
+        ldrsht  r1, [r0], #4
+        ldrht  r1, [r0], r2
+        ldrht  r1, [r0], #4
+        strht  r1, [r0], r2
+        strht  r1, [r0], #4
diff --git a/test/MC/ARM/arm_instructions.s b/test/MC/ARM/arm_instructions.s
index fbec789..50a2b70 100644
--- a/test/MC/ARM/arm_instructions.s
+++ b/test/MC/ARM/arm_instructions.s
@@ -282,3 +282,30 @@
 @ CHECK: msr  cpsr_fsxc, r0 @ encoding: [0x00,0xf0,0x2f,0xe1]
         msr  cpsr_fsxc, r0
 
+@ CHECK: add	r1, r2, r3, lsl r4      @ encoding: [0x13,0x14,0x82,0xe0]
+  add r1, r2, r3, lsl r4
+
+@ CHECK: strexb  r0, r1, [r2] @ encoding: [0x91,0x0f,0xc2,0xe1]
+        strexb  r0, r1, [r2]
+
+@ CHECK: strexh  r0, r1, [r2] @ encoding: [0x91,0x0f,0xe2,0xe1]
+        strexh  r0, r1, [r2]
+
+@ CHECK: strex  r0, r1, [r2] @ encoding: [0x91,0x0f,0x82,0xe1]
+        strex  r0, r1, [r2]
+
+@ CHECK: strexd  r0, r2, r3, [r1] @ encoding: [0x92,0x0f,0xa1,0xe1]
+        strexd  r0, r2, r3, [r1]
+
+@ CHECK: ldrexb  r0, [r0] @ encoding: [0x9f,0x0f,0xd0,0xe1]
+        ldrexb  r0, [r0]
+
+@ CHECK: ldrexh  r0, [r0] @ encoding: [0x9f,0x0f,0xf0,0xe1]
+        ldrexh  r0, [r0]
+
+@ CHECK: ldrex  r0, [r0] @ encoding: [0x9f,0x0f,0x90,0xe1]
+        ldrex  r0, [r0]
+
+@ CHECK: ldrexd  r0, r1, [r0] @ encoding: [0x9f,0x0f,0xb0,0xe1]
+        ldrexd  r0, r1, [r0]
+
diff --git a/test/MC/ARM/elf-reloc-01.ll b/test/MC/ARM/elf-reloc-01.ll
index 6b83c95..eb6e243 100644
--- a/test/MC/ARM/elf-reloc-01.ll
+++ b/test/MC/ARM/elf-reloc-01.ll
@@ -60,12 +60,11 @@ bb3:                                              ; preds = %bb, %entry
 
 declare void @exit(i32) noreturn nounwind
 
-
-;; OBJ:         Symbol 0x00000002
-;; OBJ-NEXT:    '_MergedGlobals'
-;; OBJ-NEXT:    'st_value', 0x00000010
-
 ;; OBJ:          Relocation 0x00000001
 ;; OBJ-NEXT:     'r_offset', 
 ;; OBJ-NEXT:     'r_sym', 0x00000002
 ;; OBJ-NEXT:     'r_type', 0x0000002b
+
+;; OBJ:         Symbol 0x00000002
+;; OBJ-NEXT:    '_MergedGlobals'
+;; OBJ-NEXT:    'st_value', 0x00000010
diff --git a/test/MC/ARM/elf-reloc-02.ll b/test/MC/ARM/elf-reloc-02.ll
index 132a477..091e89f 100644
--- a/test/MC/ARM/elf-reloc-02.ll
+++ b/test/MC/ARM/elf-reloc-02.ll
@@ -41,11 +41,10 @@ declare i32 @write(...)
 
 declare void @exit(i32) noreturn nounwind
 
-
-;; OBJ:          Symbol 0x00000002
-;; OBJ-NEXT:    '.L.str'
-
 ;; OBJ:        Relocation 0x00000000
 ;; OBJ-NEXT:    'r_offset', 
 ;; OBJ-NEXT:    'r_sym', 0x00000002
 ;; OBJ-NEXT:    'r_type', 0x0000002b
+
+;; OBJ:          Symbol 0x00000002
+;; OBJ-NEXT:    '.L.str'
diff --git a/test/MC/ARM/elf-reloc-03.ll b/test/MC/ARM/elf-reloc-03.ll
index e052f39..91dba55 100644
--- a/test/MC/ARM/elf-reloc-03.ll
+++ b/test/MC/ARM/elf-reloc-03.ll
@@ -88,11 +88,10 @@ entry:
 
 declare void @exit(i32) noreturn nounwind
 
-
-;; OBJ:      Symbol 0x0000000c
-;; OBJ-NEXT:    'vtable'
-
 ;; OBJ:           Relocation 0x00000001
 ;; OBJ-NEXT:     'r_offset', 
 ;; OBJ-NEXT:     'r_sym', 0x0000000c
 ;; OBJ-NEXT:     'r_type', 0x0000002b
+
+;; OBJ:      Symbol 0x0000000c
+;; OBJ-NEXT:    'vtable'
diff --git a/test/MC/ARM/neon-shift-encoding.s b/test/MC/ARM/neon-shift-encoding.s
index 4b4fa08..a7a1b83 100644
--- a/test/MC/ARM/neon-shift-encoding.s
+++ b/test/MC/ARM/neon-shift-encoding.s
@@ -1,160 +1,237 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
-@ CHECK: vshl.u8	d16, d17, d16           @ encoding: [0xa1,0x04,0x40,0xf3]
+_foo:
+@ CHECK: vshl.u8	d16, d17, d16  @ encoding: [0xa1,0x04,0x40,0xf3]
 	vshl.u8	d16, d17, d16
-@ CHECK: vshl.u16	d16, d17, d16   @ encoding: [0xa1,0x04,0x50,0xf3]
+@ CHECK: vshl.u16	d16, d17, d16  @ encoding: [0xa1,0x04,0x50,0xf3]
 	vshl.u16	d16, d17, d16
-@ CHECK: vshl.u32	d16, d17, d16   @ encoding: [0xa1,0x04,0x60,0xf3]
+@ CHECK: vshl.u32	d16, d17, d16  @ encoding: [0xa1,0x04,0x60,0xf3]
 	vshl.u32	d16, d17, d16
-@ CHECK: vshl.u64	d16, d17, d16   @ encoding: [0xa1,0x04,0x70,0xf3]
+@ CHECK: vshl.u64	d16, d17, d16  @ encoding: [0xa1,0x04,0x70,0xf3]
 	vshl.u64	d16, d17, d16
-@ CHECK: vshl.i8	d16, d16, #7            @ encoding: [0x30,0x05,0xcf,0xf2]
+@ CHECK: vshl.i8	d16, d16, #7  @ encoding: [0x30,0x05,0xcf,0xf2]
 	vshl.i8	d16, d16, #7
-@ CHECK: vshl.i16	d16, d16, #15   @ encoding: [0x30,0x05,0xdf,0xf2]
+@ CHECK: vshl.i16	d16, d16, #15  @ encoding: [0x30,0x05,0xdf,0xf2]
 	vshl.i16	d16, d16, #15
-@ CHECK: vshl.i32	d16, d16, #31   @ encoding: [0x30,0x05,0xff,0xf2]
+@ CHECK: vshl.i32	d16, d16, #31  @ encoding: [0x30,0x05,0xff,0xf2]
 	vshl.i32	d16, d16, #31
-@ CHECK: vshl.i64	d16, d16, #63   @ encoding: [0xb0,0x05,0xff,0xf2]
+@ CHECK: vshl.i64	d16, d16, #63  @ encoding: [0xb0,0x05,0xff,0xf2]
 	vshl.i64	d16, d16, #63
-@ CHECK: vshl.u8	q8, q9, q8              @ encoding: [0xe2,0x04,0x40,0xf3]
+@ CHECK: vshl.u8	q8, q9, q8  @ encoding: [0xe2,0x04,0x40,0xf3]
 	vshl.u8	q8, q9, q8
-@ CHECK: vshl.u16	q8, q9, q8      @ encoding: [0xe2,0x04,0x50,0xf3]
+@ CHECK: vshl.u16	q8, q9, q8  @ encoding: [0xe2,0x04,0x50,0xf3]
 	vshl.u16	q8, q9, q8
-@ CHECK: vshl.u32	q8, q9, q8      @ encoding: [0xe2,0x04,0x60,0xf3]
+@ CHECK: vshl.u32	q8, q9, q8  @ encoding: [0xe2,0x04,0x60,0xf3]
 	vshl.u32	q8, q9, q8
-@ CHECK: vshl.u64	q8, q9, q8      @ encoding: [0xe2,0x04,0x70,0xf3]
+@ CHECK: vshl.u64	q8, q9, q8  @ encoding: [0xe2,0x04,0x70,0xf3]
 	vshl.u64	q8, q9, q8
-@ CHECK: vshl.i8	q8, q8, #7              @ encoding: [0x70,0x05,0xcf,0xf2]
+@ CHECK: vshl.i8	q8, q8, #7  @ encoding: [0x70,0x05,0xcf,0xf2]
 	vshl.i8	q8, q8, #7
-@ CHECK: vshl.i16	q8, q8, #15     @ encoding: [0x70,0x05,0xdf,0xf2]
+@ CHECK: vshl.i16	q8, q8, #15  @ encoding: [0x70,0x05,0xdf,0xf2]
 	vshl.i16	q8, q8, #15
-@ CHECK: vshl.i32	q8, q8, #31     @ encoding: [0x70,0x05,0xff,0xf2]
+@ CHECK: vshl.i32	q8, q8, #31  @ encoding: [0x70,0x05,0xff,0xf2]
 	vshl.i32	q8, q8, #31
-@ CHECK: vshl.i64	q8, q8, #63     @ encoding: [0xf0,0x05,0xff,0xf2]
+@ CHECK: vshl.i64	q8, q8, #63  @ encoding: [0xf0,0x05,0xff,0xf2]
 	vshl.i64	q8, q8, #63
-@ CHECK: vshr.u8	d16, d16, #8            @ encoding: [0x30,0x00,0xc8,0xf3]
-	vshr.u8	d16, d16, #8
-@ CHECK: vshr.u16	d16, d16, #16   @ encoding: [0x30,0x00,0xd0,0xf3]
-	vshr.u16	d16, d16, #16
-@ CHECK: vshr.u32	d16, d16, #32   @ encoding: [0x30,0x00,0xe0,0xf3]
-	vshr.u32	d16, d16, #32
-@ CHECK: vshr.u64	d16, d16, #64   @ encoding: [0xb0,0x00,0xc0,0xf3]
-	vshr.u64	d16, d16, #64
-@ CHECK: vshr.u8	q8, q8, #8              @ encoding: [0x70,0x00,0xc8,0xf3]
-	vshr.u8	q8, q8, #8
-@ CHECK: vshr.u16	q8, q8, #16     @ encoding: [0x70,0x00,0xd0,0xf3]
-	vshr.u16	q8, q8, #16
-@ CHECK: vshr.u32	q8, q8, #32     @ encoding: [0x70,0x00,0xe0,0xf3]
-	vshr.u32	q8, q8, #32
-@ CHECK: vshr.u64	q8, q8, #64     @ encoding: [0xf0,0x00,0xc0,0xf3]
-	vshr.u64	q8, q8, #64
-@ CHECK: vshr.s8	d16, d16, #8            @ encoding: [0x30,0x00,0xc8,0xf2]
-	vshr.s8	d16, d16, #8
-@ CHECK: vshr.s16	d16, d16, #16   @ encoding: [0x30,0x00,0xd0,0xf2]
-	vshr.s16	d16, d16, #16
-@ CHECK: vshr.s32	d16, d16, #32   @ encoding: [0x30,0x00,0xe0,0xf2]
-	vshr.s32	d16, d16, #32
-@ CHECK: vshr.s64	d16, d16, #64   @ encoding: [0xb0,0x00,0xc0,0xf2]
-	vshr.s64	d16, d16, #64
-@ CHECK: vshr.s8	q8, q8, #8              @ encoding: [0x70,0x00,0xc8,0xf2]
-	vshr.s8	q8, q8, #8
-@ CHECK: vshr.s16	q8, q8, #16     @ encoding: [0x70,0x00,0xd0,0xf2]
-	vshr.s16	q8, q8, #16
-@ CHECK: vshr.s32	q8, q8, #32     @ encoding: [0x70,0x00,0xe0,0xf2
-	vshr.s32	q8, q8, #32
-@ CHECK: vshr.s64	q8, q8, #64     @ encoding: [0xf0,0x00,0xc0,0xf2]
-	vshr.s64	q8, q8, #64
-@ CHECK: vshll.s8	q8, d16, #7     @ encoding: [0x30,0x0a,0xcf,0xf2]
+@ CHECK: vshr.u8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf3]
+	vshr.u8	d16, d16, #7
+@ CHECK: vshr.u16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf3]
+	vshr.u16	d16, d16, #15
+@ CHECK: vshr.u32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf3]
+	vshr.u32	d16, d16, #31
+@ CHECK: vshr.u64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf3]
+	vshr.u64	d16, d16, #63
+@ CHECK: vshr.u8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf3]
+	vshr.u8	q8, q8, #7
+@ CHECK: vshr.u16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf3]
+	vshr.u16	q8, q8, #15
+@ CHECK: vshr.u32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf3]
+	vshr.u32	q8, q8, #31
+@ CHECK: vshr.u64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf3]
+	vshr.u64	q8, q8, #63
+@ CHECK: vshr.s8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf2]
+	vshr.s8	d16, d16, #7
+@ CHECK: vshr.s16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf2]
+	vshr.s16	d16, d16, #15
+@ CHECK: vshr.s32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf2]
+	vshr.s32	d16, d16, #31
+@ CHECK: vshr.s64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf2]
+	vshr.s64	d16, d16, #63
+@ CHECK: vshr.s8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf2]
+	vshr.s8	q8, q8, #7
+@ CHECK: vshr.s16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf2]
+	vshr.s16	q8, q8, #15
+@ CHECK: vshr.s32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf2]
+	vshr.s32	q8, q8, #31
+@ CHECK: vshr.s64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf2]
+	vshr.s64	q8, q8, #63
+@ CHECK: vsra.u8  d16, d16, #7   @ encoding: [0x30,0x01,0xc9,0xf3]
+	vsra.u8   d16, d16, #7
+@ CHECK: vsra.u16 d16, d16, #15  @ encoding: [0x30,0x01,0xd1,0xf3]
+	vsra.u16  d16, d16, #15
+@ CHECK: vsra.u32 d16, d16, #31  @ encoding: [0x30,0x01,0xe1,0xf3]
+	vsra.u32  d16, d16, #31
+@ CHECK: vsra.u64 d16, d16, #63  @ encoding: [0xb0,0x01,0xc1,0xf3]
+	vsra.u64  d16, d16, #63
+@ CHECK: vsra.u8  q8, q8, #7     @ encoding: [0x70,0x01,0xc9,0xf3]
+	vsra.u8   q8, q8, #7
+@ CHECK: vsra.u16 q8, q8, #15    @ encoding: [0x70,0x01,0xd1,0xf3]
+	vsra.u16  q8, q8, #15
+@ CHECK: vsra.u32 q8, q8, #31    @ encoding: [0x70,0x01,0xe1,0xf3]
+	vsra.u32  q8, q8, #31
+@ CHECK: vsra.u64 q8, q8, #63    @ encoding: [0xf0,0x01,0xc1,0xf3]
+	vsra.u64  q8, q8, #63
+@ CHECK: vsra.s8  d16, d16, #7   @ encoding: [0x30,0x01,0xc9,0xf2]
+	vsra.s8   d16, d16, #7
+@ CHECK: vsra.s16 d16, d16, #15  @ encoding: [0x30,0x01,0xd1,0xf2]
+	vsra.s16  d16, d16, #15
+@ CHECK: vsra.s32 d16, d16, #31  @ encoding: [0x30,0x01,0xe1,0xf2]
+	vsra.s32  d16, d16, #31
+@ CHECK: vsra.s64 d16, d16, #63  @ encoding: [0xb0,0x01,0xc1,0xf2]
+	vsra.s64  d16, d16, #63
+@ CHECK: vsra.s8  q8, q8, #7     @ encoding: [0x70,0x01,0xc9,0xf2]
+	vsra.s8   q8, q8, #7
+@ CHECK: vsra.s16 q8, q8, #15    @ encoding: [0x70,0x01,0xd1,0xf2]
+	vsra.s16  q8, q8, #15
+@ CHECK: vsra.s32 q8, q8, #31    @ encoding: [0x70,0x01,0xe1,0xf2]
+	vsra.s32  q8, q8, #31
+@ CHECK: vsra.s64 q8, q8, #63    @ encoding: [0xf0,0x01,0xc1,0xf2]
+	vsra.s64  q8, q8, #63
+@ CHECK: vsri.8   d16, d16, #7  @ encoding: [0x30,0x04,0xc9,0xf3]
+	vsri.8   d16, d16, #7
+@ CHECK: vsri.16  d16, d16, #15 @ encoding: [0x30,0x04,0xd1,0xf3]
+	vsri.16  d16, d16, #15
+@ CHECK: vsri.32  d16, d16, #31 @ encoding: [0x30,0x04,0xe1,0xf3]
+	vsri.32  d16, d16, #31
+@ CHECK: vsri.64  d16, d16, #63 @ encoding: [0xb0,0x04,0xc1,0xf3]
+	vsri.64  d16, d16, #63
+@ CHECK: vsri.8   q8, q8, #7    @ encoding: [0x70,0x04,0xc9,0xf3]
+	vsri.8   q8, q8, #7
+@ CHECK: vsri.16  q8, q8, #15   @ encoding: [0x70,0x04,0xd1,0xf3]
+	vsri.16  q8, q8, #15
+@ CHECK: vsri.32  q8, q8, #31   @ encoding: [0x70,0x04,0xe1,0xf3]
+	vsri.32  q8, q8, #31
+@ CHECK: vsri.64  q8, q8, #63   @ encoding: [0xf0,0x04,0xc1,0xf3]
+	vsri.64  q8, q8, #63
+@ CHECK: vsli.8   d16, d16, #7  @ encoding: [0x30,0x05,0xcf,0xf3]
+	vsli.8   d16, d16, #7
+@ CHECK: vsli.16  d16, d16, #15 @ encoding: [0x30,0x05,0xdf,0xf3]
+	vsli.16  d16, d16, #15
+@ CHECK: vsli.32  d16, d16, #31 @ encoding: [0x30,0x05,0xff,0xf3]
+	vsli.32  d16, d16, #31
+@ CHECK: vsli.64  d16, d16, #63 @ encoding: [0xb0,0x05,0xff,0xf3]
+	vsli.64  d16, d16, #63
+@ CHECK: vsli.8   q8, q8, #7    @ encoding: [0x70,0x05,0xcf,0xf3]
+	vsli.8   q8, q8, #7
+@ CHECK: vsli.16  q8, q8, #15   @ encoding: [0x70,0x05,0xdf,0xf3]
+	vsli.16  q8, q8, #15
+@ CHECK: vsli.32  q8, q8, #31   @ encoding: [0x70,0x05,0xff,0xf3]
+	vsli.32  q8, q8, #31
+@ CHECK: vsli.64  q8, q8, #63   @ encoding: [0xf0,0x05,0xff,0xf3]
+	vsli.64  q8, q8, #63
+@ CHECK: vshll.s8	q8, d16, #7  @ encoding: [0x30,0x0a,0xcf,0xf2]
 	vshll.s8	q8, d16, #7
-@ CHECK: vshll.s16	q8, d16, #15    @ encoding: [0x30,0x0a,0xdf,0xf2]
+@ CHECK: vshll.s16	q8, d16, #15  @ encoding: [0x30,0x0a,0xdf,0xf2]
 	vshll.s16	q8, d16, #15
-@ CHECK: vshll.s32	q8, d16, #31    @ encoding: [0x30,0x0a,0xff,0xf2]
+@ CHECK: vshll.s32	q8, d16, #31  @ encoding: [0x30,0x0a,0xff,0xf2]
 	vshll.s32	q8, d16, #31
-@ CHECK: vshll.u8	q8, d16, #7     @ encoding: [0x30,0x0a,0xcf,0xf3]
+@ CHECK: vshll.u8	q8, d16, #7  @ encoding: [0x30,0x0a,0xcf,0xf3]
 	vshll.u8	q8, d16, #7
-@ CHECK: vshll.u16	q8, d16, #15    @ encoding: [0x30,0x0a,0xdf,0xf3]
+@ CHECK: vshll.u16	q8, d16, #15  @ encoding: [0x30,0x0a,0xdf,0xf3]
 	vshll.u16	q8, d16, #15
-@ CHECK: vshll.u32	q8, d16, #31    @ encoding: [0x30,0x0a,0xff,0xf3]
+@ CHECK: vshll.u32	q8, d16, #31  @ encoding: [0x30,0x0a,0xff,0xf3]
 	vshll.u32	q8, d16, #31
-@ CHECK: vshll.i8	q8, d16, #8     @ encoding: [0x20,0x03,0xf2,0xf3]
+@ CHECK: vshll.i8	q8, d16, #8  @ encoding: [0x20,0x03,0xf2,0xf3]
 	vshll.i8	q8, d16, #8
-@ CHECK: vshll.i16	q8, d16, #16    @ encoding: [0x20,0x03,0xf6,0xf3]
+@ CHECK: vshll.i16	q8, d16, #16  @ encoding: [0x20,0x03,0xf6,0xf3]
 	vshll.i16	q8, d16, #16
-@ CHECK: vshll.i32	q8, d16, #32    @ encoding: [0x20,0x03,0xfa,0xf3]
+@ CHECK: vshll.i32	q8, d16, #32  @ encoding: [0x20,0x03,0xfa,0xf3]
 	vshll.i32	q8, d16, #32
-@ CHECK: vshrn.i16	d16, q8, #8     @ encoding: [0x30,0x08,0xc8,0xf2]
+@ CHECK: vshrn.i16	d16, q8, #8  @ encoding: [0x30,0x08,0xc8,0xf2]
 	vshrn.i16	d16, q8, #8
-@ CHECK: vshrn.i32	d16, q8, #16    @ encoding: [0x30,0x08,0xd0,0xf2]
+@ CHECK: vshrn.i32	d16, q8, #16  @ encoding: [0x30,0x08,0xd0,0xf2]
 	vshrn.i32	d16, q8, #16
-@ CHECK: vshrn.i64	d16, q8, #32    @ encoding: [0x30,0x08,0xe0,0xf2]
+@ CHECK: vshrn.i64	d16, q8, #32  @ encoding: [0x30,0x08,0xe0,0xf2]
 	vshrn.i64	d16, q8, #32
-@ CHECK: vrshl.s8	d16, d17, d16   @ encoding: [0xa1,0x05,0x40,0xf2]
+@ CHECK: vrshl.s8	d16, d17, d16  @ encoding: [0xa1,0x05,0x40,0xf2]
 	vrshl.s8	d16, d17, d16
-@ CHECK: vrshl.s16	d16, d17, d16   @ encoding: [0xa1,0x05,0x50,0xf2]
+@ CHECK: vrshl.s16	d16, d17, d16  @ encoding: [0xa1,0x05,0x50,0xf2]
 	vrshl.s16	d16, d17, d16
-@ CHECK: vrshl.s32	d16, d17, d16   @ encoding: [0xa1,0x05,0x60,0xf2]
+@ CHECK: vrshl.s32	d16, d17, d16  @ encoding: [0xa1,0x05,0x60,0xf2]
 	vrshl.s32	d16, d17, d16
-@ CHECK: vrshl.s64	d16, d17, d16   @ encoding: [0xa1,0x05,0x70,0
+@ CHECK: vrshl.s64	d16, d17, d16  @ encoding: [0xa1,0x05,0x70,0xf2]
 	vrshl.s64	d16, d17, d16
-@ CHECK: vrshl.u8	d16, d17, d16   @ encoding: [0xa1,0x05,0x40,0xf3]
+@ CHECK: vrshl.u8	d16, d17, d16  @ encoding: [0xa1,0x05,0x40,0xf3]
 	vrshl.u8	d16, d17, d16
-@ CHECK: vrshl.u16	d16, d17, d16   @ encoding: [0xa1,0x05,0x50,0xf3]
+@ CHECK: vrshl.u16	d16, d17, d16  @ encoding: [0xa1,0x05,0x50,0xf3]
 	vrshl.u16	d16, d17, d16
-@ CHECK: vrshl.u32	d16, d17, d16   @ encoding: [0xa1,0x05,0x60,0xf3]
+@ CHECK: vrshl.u32	d16, d17, d16  @ encoding: [0xa1,0x05,0x60,0xf3]
 	vrshl.u32	d16, d17, d16
-@ CHECK: vrshl.u64	d16, d17, d16   @ encoding: [0xa1,0x05,0x70,0xf3]
+@ CHECK: vrshl.u64	d16, d17, d16  @ encoding: [0xa1,0x05,0x70,0xf3]
 	vrshl.u64	d16, d17, d16
-@ CHECK: vrshl.s8	q8, q9, q8      @ encoding: [0xe2,0x05,0x40,0xf2]
+@ CHECK: vrshl.s8	q8, q9, q8  @ encoding: [0xe2,0x05,0x40,0xf2]
 	vrshl.s8	q8, q9, q8
-@ CHECK: vrshl.s16	q8, q9, q8      @ encoding: [0xe2,0x05,0x50,0xf2]
+@ CHECK: vrshl.s16	q8, q9, q8  @ encoding: [0xe2,0x05,0x50,0xf2]
 	vrshl.s16	q8, q9, q8
-@ CHECK: vrshl.s32	q8, q9, q8      @ encoding: [0xe2,0x05,0x60,0xf2]
+@ CHECK: vrshl.s32	q8, q9, q8  @ encoding: [0xe2,0x05,0x60,0xf2]
 	vrshl.s32	q8, q9, q8
-@ CHECK: vrshl.s64	q8, q9, q8      @ encoding: [0xe2,0x05,0x70,0xf2]
+@ CHECK: vrshl.s64	q8, q9, q8  @ encoding: [0xe2,0x05,0x70,0xf2]
 	vrshl.s64	q8, q9, q8
-@ CHECK: vrshl.u8	q8, q9, q8      @ encoding: [0xe2,0x05,0x40,0xf3]
+@ CHECK: vrshl.u8	q8, q9, q8  @ encoding: [0xe2,0x05,0x40,0xf3]
 	vrshl.u8	q8, q9, q8
-@ CHECK: vrshl.u16	q8, q9, q8      @ encoding: [0xe2,0x05,0x50,0xf3]
+@ CHECK: vrshl.u16	q8, q9, q8  @ encoding: [0xe2,0x05,0x50,0xf3]
 	vrshl.u16	q8, q9, q8
-@ CHECK: vrshl.u32	q8, q9, q8      @ encoding: [0xe2,0x05,0x60,0xf3]
+@ CHECK: vrshl.u32	q8, q9, q8  @ encoding: [0xe2,0x05,0x60,0xf3]
 	vrshl.u32	q8, q9, q8
-@ CHECK: vrshl.u64	q8, q9, q8      @ encoding: [0xe2,0x05,0x70,0xf3]
+@ CHECK: vrshl.u64	q8, q9, q8  @ encoding: [0xe2,0x05,0x70,0xf3]
 	vrshl.u64	q8, q9, q8
-@ CHECK: vrshr.s8	d16, d16, #8    @ encoding: [0x30,0x02,0xc8,0xf2]
+@ CHECK: vrshr.s8	d16, d16, #8  @ encoding: [0x30,0x02,0xc8,0xf2]
 	vrshr.s8	d16, d16, #8
-@ CHECK: vrshr.s16	d16, d16, #16   @ encoding: [0x30,0x02,0xd0,0xf2]
+@ CHECK: vrshr.s16	d16, d16, #16  @ encoding: [0x30,0x02,0xd0,0xf2]
 	vrshr.s16	d16, d16, #16
-@ CHECK: vrshr.s32	d16, d16, #32   @ encoding: [0x30,0x02,0xe0,0xf2]
+@ CHECK: vrshr.s32	d16, d16, #32  @ encoding: [0x30,0x02,0xe0,0xf2]
 	vrshr.s32	d16, d16, #32
-@ CHECK: vrshr.s64	d16, d16, #64   @ encoding: [0xb0,0x02,0xc0,0xf2]
+@ CHECK: vrshr.s64	d16, d16, #64  @ encoding: [0xb0,0x02,0xc0,0xf2]
 	vrshr.s64	d16, d16, #64
-@ CHECK: vrshr.u8	d16, d16, #8    @ encoding: [0x30,0x02,0xc8,0xf3]
+@ CHECK: vrshr.u8	d16, d16, #8  @ encoding: [0x30,0x02,0xc8,0xf3]
 	vrshr.u8	d16, d16, #8
-@ CHECK: vrshr.u16	d16, d16, #16   @ encoding: [0x30,0x02,0xd0,0xf3]
+@ CHECK: vrshr.u16	d16, d16, #16  @ encoding: [0x30,0x02,0xd0,0xf3]
 	vrshr.u16	d16, d16, #16
-@ CHECK: vrshr.u32	d16, d16, #32   @ encoding: [0x30,0x02,0xe0,0xf3]
+@ CHECK: vrshr.u32	d16, d16, #32  @ encoding: [0x30,0x02,0xe0,0xf3]
 	vrshr.u32	d16, d16, #32
-@ CHECK: vrshr.u64	d16, d16, #64   @ encoding: [0xb0,0x02,0xc0,0xf3]
+@ CHECK: vrshr.u64	d16, d16, #64  @ encoding: [0xb0,0x02,0xc0,0xf3]
 	vrshr.u64	d16, d16, #64
-@ CHECK: vrshr.s8	q8, q8, #8      @ encoding: [0x70,0x02,0xc8,0xf2]
+@ CHECK: vrshr.s8	q8, q8, #8  @ encoding: [0x70,0x02,0xc8,0xf2]
 	vrshr.s8	q8, q8, #8
-@ CHECK: vrshr.s16	q8, q8, #16     @ encoding: [0x70,0x02,0xd0,0xf2]
+@ CHECK: vrshr.s16	q8, q8, #16  @ encoding: [0x70,0x02,0xd0,0xf2]
 	vrshr.s16	q8, q8, #16
-@ CHECK: vrshr.s32	q8, q8, #32     @ encoding: [0x70,0x02,0xe0,0xf2]
+@ CHECK: vrshr.s32	q8, q8, #32  @ encoding: [0x70,0x02,0xe0,0xf2]
 	vrshr.s32	q8, q8, #32
-@ CHECK: vrshr.s64	q8, q8, #64     @ encoding: [0xf0,0x02,0xc0,0xf2]
+@ CHECK: vrshr.s64	q8, q8, #64  @ encoding: [0xf0,0x02,0xc0,0xf2]
 	vrshr.s64	q8, q8, #64
-@ CHECK: vrshr.u8	q8, q8, #8      @ encoding: [0x70,0x02,0xc8,0xf3]
+@ CHECK: vrshr.u8	q8, q8, #8  @ encoding: [0x70,0x02,0xc8,0xf3]
 	vrshr.u8	q8, q8, #8
-@ CHECK: vrshr.u16	q8, q8, #16     @ encoding: [0x70,0x02,0xd0,0xf3]
+@ CHECK: vrshr.u16	q8, q8, #16  @ encoding: [0x70,0x02,0xd0,0xf3]
 	vrshr.u16	q8, q8, #16
-@ CHECK: vrshr.u32	q8, q8, #32     @ encoding: [0x70,0x02,0xe0,0xf3]
+@ CHECK: vrshr.u32	q8, q8, #32  @ encoding: [0x70,0x02,0xe0,0xf3]
 	vrshr.u32	q8, q8, #32
-@ CHECK: vrshr.u64	q8, q8, #64     @ encoding: [0xf0,0x02,0xc0,0xf3]
+@ CHECK: vrshr.u64	q8, q8, #64  @ encoding: [0xf0,0x02,0xc0,0xf3]
 	vrshr.u64	q8, q8, #64
-@ CHECK: vrshrn.i16	d16, q8, #8     @ encoding: [0x70,0x08,0xc8,0xf2]
+@ CHECK: vrshrn.i16	d16, q8, #8  @ encoding: [0x70,0x08,0xc8,0xf2]
 	vrshrn.i16	d16, q8, #8
-@ CHECK: vrshrn.i32	d16, q8, #16    @ encoding: [0x70,0x08,0xd0,0xf2]
+@ CHECK: vrshrn.i32	d16, q8, #16  @ encoding: [0x70,0x08,0xd0,0xf2]
 	vrshrn.i32	d16, q8, #16
-@ CHECK: vrshrn.i64	d16, q8, #32    @ encoding: [0x70,0x08,0xe0,0xf2]
+@ CHECK: vrshrn.i64	d16, q8, #32  @ encoding: [0x70,0x08,0xe0,0xf2]
 	vrshrn.i64	d16, q8, #32
+@ CHECK: vqrshrn.s16	d16, q8, #4  @ encoding: [0x70,0x09,0xcc,0xf2]
+	vqrshrn.s16	d16, q8, #4
+@ CHECK: vqrshrn.s32	d16, q8, #13  @ encoding: [0x70,0x09,0xd3,0xf2]
+	vqrshrn.s32	d16, q8, #13
+@ CHECK: vqrshrn.s64	d16, q8, #13  @ encoding: [0x70,0x09,0xf3,0xf2]
+	vqrshrn.s64	d16, q8, #13
+@ CHECK: vqrshrn.u16	d16, q8, #4  @ encoding: [0x70,0x09,0xcc,0xf3]
+	vqrshrn.u16	d16, q8, #4
+@ CHECK: vqrshrn.u32	d16, q8, #13  @ encoding: [0x70,0x09,0xd3,0xf3]
+	vqrshrn.u32	d16, q8, #13
+@ CHECK: vqrshrn.u64	d16, q8, #13  @ encoding: [0x70,0x09,0xf3,0xf3]
+	vqrshrn.u64	d16, q8, #13
diff --git a/test/MC/ARM/simple-encoding.ll b/test/MC/ARM/simple-encoding.ll
index 0877e8e..7b581b3 100644
--- a/test/MC/ARM/simple-encoding.ll
+++ b/test/MC/ARM/simple-encoding.ll
@@ -1,4 +1,4 @@
-;RUN: llc -mtriple=armv7-apple-darwin -show-mc-encoding < %s | FileCheck %s
+;RUN: llc -mtriple=armv7-apple-darwin -show-mc-encoding -disable-cgp-branch-opts < %s | FileCheck %s
 
 
 ;FIXME: Once the ARM integrated assembler is up and going, these sorts of tests
@@ -39,7 +39,7 @@ define i32 @f3(i32 %a, i32 %b) {
 
 define i32 @f4(i32 %a, i32 %b) {
 ; CHECK: f4
-; CHECK: add r0, r0, #254, 28         @ encoding: [0xfe,0x0e,0x80,0xe2]
+; CHECK: add r0, r0, #254, #28        @ encoding: [0xfe,0x0e,0x80,0xe2]
 ; CHECK:                              @ 4064
 ; CHECK: bx lr                        @ encoding: [0x1e,0xff,0x2f,0xe1]
   %add = add nsw i32 %a, 4064
@@ -118,7 +118,7 @@ define i32 @f12(i32 %a) {
 define i64 @f13() {
 ; CHECK: f13:
 ; CHECK: mvn r0, #0                   @ encoding: [0x00,0x00,0xe0,0xe3]
-; CHECK: mvn r1, #2, 2                @ encoding: [0x02,0x11,0xe0,0xe3]
+; CHECK: mvn r1, #2, #2               @ encoding: [0x02,0x11,0xe0,0xe3]
         ret i64 9223372036854775807
 }
 
@@ -229,7 +229,7 @@ define i32 @f23(i32 %X, i32 %Y) {
 
 define void @f24(i32 %a) {
 ; CHECK: f24
-; CHECK: cmp r0, #1, 16               @ encoding: [0x01,0x08,0x50,0xe3]
+; CHECK: cmp r0, #1, #16               @ encoding: [0x01,0x08,0x50,0xe3]
         %b = icmp ugt i32 %a, 65536
         br i1 %b, label %r, label %r
 r:
diff --git a/test/MC/ARM/thumb2.s b/test/MC/ARM/thumb2.s
index cd09311..5342b90 100644
--- a/test/MC/ARM/thumb2.s
+++ b/test/MC/ARM/thumb2.s
@@ -284,3 +284,19 @@
 @ CHECK: msr  cpsr_fsxc, r0 @ encoding: [0x80,0xf3,0x00,0x8f]
   msr  cpsr_fsxc, r0
 
+@ CHECK: strexb  r0, r1, [r2] @ encoding: [0xc2,0xe8,0x40,0x1f]
+  strexb  r0, r1, [r2]
+@ CHECK: strexh  r0, r1, [r2] @ encoding: [0xc2,0xe8,0x50,0x1f]
+  strexh  r0, r1, [r2]
+@ CHECK: strex  r0, r1, [r2] @ encoding: [0x42,0xe8,0x00,0x10]
+  strex  r0, r1, [r2]
+@ CHECK: strexd  r0, r2, r3, [r1] @ encoding: [0xc1,0xe8,0x70,0x23]
+  strexd  r0, r2, r3, [r1]
+@ CHECK: ldrexb  r0, [r0] @ encoding: [0xd0,0xe8,0x4f,0x0f]
+  ldrexb  r0, [r0]
+@ CHECK: ldrexh  r0, [r0] @ encoding: [0xd0,0xe8,0x5f,0x0f]
+  ldrexh  r0, [r0]
+@ CHECK: ldrex  r0, [r0] @ encoding: [0x50,0xe8,0x00,0x0f]
+  ldrex  r0, [r0]
+@ CHECK: ldrexd  r0, r1, [r0] @ encoding: [0xd0,0xe8,0x7f,0x01]
+  ldrexd  r0, r1, [r0]
diff --git a/test/MC/AsmParser/directive_space.s b/test/MC/AsmParser/directive_space.s
index e6353a4..fc5aeb4 100644
--- a/test/MC/AsmParser/directive_space.s
+++ b/test/MC/AsmParser/directive_space.s
@@ -9,3 +9,8 @@ TEST0:
 # CHECK: .space	2,3
 TEST1:  
         .space 2, 3
+
+# CHECK: TEST2:
+# CHECK: .space 1
+TEST2:
+        .skip 1
diff --git a/test/MC/AsmParser/dot-symbol.s b/test/MC/AsmParser/dot-symbol.s
new file mode 100644
index 0000000..4a38a40
--- /dev/null
+++ b/test/MC/AsmParser/dot-symbol.s
@@ -0,0 +1,12 @@
+# Historically 'as' treats '.' as a reference to the current location in
+# arbitrary contects. We don't support this in general.
+
+# RUN: not llvm-mc -triple i386-unknown-unknown %s 2> %t
+# RUN: FileCheck -input-file %t %s
+
+# CHECK: assignment to pseudo-symbol '.' is unsupported (use '.space' or '.org').
+. = . + 8
+
+# CHECK: invalid use of pseudo-symbol '.' as a label
+.:
+        .long 0
diff --git a/test/MC/AsmParser/exprs-invalid.s b/test/MC/AsmParser/exprs-invalid.s
index 5358fc5..dc27d80 100644
--- a/test/MC/AsmParser/exprs-invalid.s
+++ b/test/MC/AsmParser/exprs-invalid.s
@@ -1,13 +1,8 @@
-// RUN: not llvm-mc -triple i386-unknown-unknown %s 2> %t
-// RUN: FileCheck -input-file %t %s
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err | FileCheck %s
+// RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err
+// CHECK: 	.section	__TEXT,__text,regular,pure_instructions
+// CHECK-ERRORS: error: invalid octal number
+.long 80+08
 
-// Currently XFAIL'ed, since the front-end isn't validating this. Figure out the
-// right resolution.
-//
-// XFAIL: *
-
-        .text
-a:
-        .data
-// CHECK: expected relocatable expression
-        .long -(0 + a)
+// CHECK-ERRORS: error: invalid hexadecimal number
+.long 80+0xzz
diff --git a/test/MC/AsmParser/floating-literals.s b/test/MC/AsmParser/floating-literals.s
index bd122a8..d44bb98 100644
--- a/test/MC/AsmParser/floating-literals.s
+++ b/test/MC/AsmParser/floating-literals.s
@@ -6,6 +6,15 @@
 # CHECK: .long	1082549862
 .single 1.2455, +2.3, 3, + 4.2
 
+# CHECK: .long	2139095040
+.single InFinIty
+
+# CHECK: .long	4286578688
+.single -iNf
+
+# CHECK: .long	2147483647
+.single nAN
+
 # CHECK: .long  1067928519
 .float 1.307
         
diff --git a/test/MC/AsmParser/rename.s b/test/MC/AsmParser/rename.s
index 64ca515..934cee8 100644
--- a/test/MC/AsmParser/rename.s
+++ b/test/MC/AsmParser/rename.s
@@ -1,10 +1,14 @@
 // RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
 
         .size bar, . - bar
+.Ltmp01:
+       .size foo, .Ltmp01 - foo
 .Ltmp0:
-       .size foo, .Ltmp0 - foo
+       .size qux, .Ltmp0 - qux
 
 // CHECK: .Ltmp0:
 // CHECK: .size  bar, .Ltmp0-bar
 // CHECK: .Ltmp01
 // CHECK: .size foo, .Ltmp01-foo
+// CHECK: .Ltmp02
+// CHECK: .size qux, .Ltmp02-qux
diff --git a/test/MC/AsmParser/section.s b/test/MC/AsmParser/section.s
index 414fc6d..5abacc7 100644
--- a/test/MC/AsmParser/section.s
+++ b/test/MC/AsmParser/section.s
@@ -45,7 +45,7 @@
 .previous
 .byte 1
 .previous
-# CHECK:       (('sh_name', 0x00000012) # 'test1'
+# CHECK:       (('sh_name', 0x00000044) # 'test1'
 # CHECK-NEXT:   ('sh_type', 0x00000001)
 # CHECK-NEXT:   ('sh_flags', 0x00000000)
 # CHECK-NEXT:   ('sh_addr', 0x00000000)
@@ -57,7 +57,7 @@
 # CHECK-NEXT:   ('sh_entsize', 0x00000000)
 # CHECK-NEXT:   ('_section_data', '01010101 010101')
 # CHECK-NEXT:  ),
-# CHECK:       (('sh_name', 0x00000018) # 'test2'
+# CHECK:       (('sh_name', 0x0000003e) # 'test2'
 # CHECK-NEXT:   ('sh_type', 0x00000001)
 # CHECK-NEXT:   ('sh_flags', 0x00000000)
 # CHECK-NEXT:   ('sh_addr', 0x00000000)
@@ -69,7 +69,7 @@
 # CHECK-NEXT:   ('sh_entsize', 0x00000000)
 # CHECK-NEXT:   ('_section_data', '02020202 0202')
 # CHECK-NEXT:  ),
-# CHECK:       (('sh_name', 0x0000001e) # 'test3'
+# CHECK:       (('sh_name', 0x00000038) # 'test3'
 # CHECK-NEXT:   ('sh_type', 0x00000001)
 # CHECK-NEXT:   ('sh_flags', 0x00000000)
 # CHECK-NEXT:   ('sh_addr', 0x00000000)
@@ -81,7 +81,7 @@
 # CHECK-NEXT:   ('sh_entsize', 0x00000000)
 # CHECK-NEXT:   ('_section_data', '03030303 03')
 # CHECK-NEXT:  ),
-# CHECK:       (('sh_name', 0x00000024) # 'test4'
+# CHECK:       (('sh_name', 0x00000032) # 'test4'
 # CHECK-NEXT:   ('sh_type', 0x00000001)
 # CHECK-NEXT:   ('sh_flags', 0x00000000)
 # CHECK-NEXT:   ('sh_addr', 0x00000000)
@@ -93,7 +93,7 @@
 # CHECK-NEXT:   ('sh_entsize', 0x00000000)
 # CHECK-NEXT:   ('_section_data', '040404')
 # CHECK-NEXT:  ),
-# CHECK:       (('sh_name', 0x0000002a) # 'test5'
+# CHECK:       (('sh_name', 0x0000002c) # 'test5'
 # CHECK-NEXT:   ('sh_type', 0x00000001)
 # CHECK-NEXT:   ('sh_flags', 0x00000000)
 # CHECK-NEXT:   ('sh_addr', 0x00000000)
diff --git a/test/MC/COFF/basic-coff.s b/test/MC/COFF/basic-coff.s
index 0c86582..23156b8 100644
--- a/test/MC/COFF/basic-coff.s
+++ b/test/MC/COFF/basic-coff.s
@@ -1,133 +1,133 @@
-// This test checks that the COFF object emitter works for the most basic
-// programs.
-
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
-// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s
-
-.def	 _main;
-	.scl	2;
-	.type	32;
-	.endef
-	.text
-	.globl	_main
-	.align	16, 0x90
-_main:                                  # @main
-# BB#0:                                 # %entry
-	subl	$4, %esp
-	movl	$L_.str, (%esp)
-	calll	_printf
-	xorl	%eax, %eax
-	addl	$4, %esp
-	ret
-
-	.data
-L_.str:                                 # @.str
-	.asciz	 "Hello World"
-
-// CHECK: {
-// CHECK:   MachineType              = IMAGE_FILE_MACHINE_I386 (0x14C)
-// CHECK:   NumberOfSections         = 2
-// CHECK:   TimeDateStamp            = {{[0-9]+}}
-// CHECK:   PointerToSymbolTable     = 0x{{[0-9A-F]+}}
-// CHECK:   NumberOfSymbols          = 6
-// CHECK:   SizeOfOptionalHeader     = 0
-// CHECK:   Characteristics          = 0x0
-// CHECK:   Sections                 = [
-// CHECK:     1 = {
-// CHECK:       Name                     = .text
-// CHECK:       VirtualSize              = 0
-// CHECK:       VirtualAddress           = 0
-// CHECK:       SizeOfRawData            = {{[0-9]+}}
-// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToRelocations     = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToLineNumbers     = 0x0
-// CHECK:       NumberOfRelocations      = 2
-// CHECK:       NumberOfLineNumbers      = 0
-// CHECK:       Charateristics           = 0x60500020
-// CHECK:         IMAGE_SCN_CNT_CODE
-// CHECK:         IMAGE_SCN_ALIGN_16BYTES
-// CHECK:         IMAGE_SCN_MEM_EXECUTE
-// CHECK:         IMAGE_SCN_MEM_READ
-// CHECK:       SectionData              =
-// CHECK:       Relocations              = [
-// CHECK:         0 = {
-// CHECK:           VirtualAddress           = 0x{{[0-9A-F]+}}
-// CHECK:           SymbolTableIndex         = 2
-// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
-// CHECK:           SymbolName               = .data
-// CHECK:         }
-// CHECK:         1 = {
-// CHECK:           VirtualAddress           = 0x{{[0-9A-F]+}}
-// CHECK:           SymbolTableIndex         = 5
-// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK:           SymbolName               = _printf
-// CHECK:         }
-// CHECK:       ]
-// CHECK:     }
-// CHECK:     2 = {
-// CHECK:       Name                     = .data
-// CHECK:       VirtualSize              = 0
-// CHECK:       VirtualAddress           = 0
-// CHECK:       SizeOfRawData            = {{[0-9]+}}
-// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToRelocations     = 0x0
-// CHECK:       PointerToLineNumbers     = 0x0
-// CHECK:       NumberOfRelocations      = 0
-// CHECK:       NumberOfLineNumbers      = 0
-// CHECK:       Charateristics           = 0xC0300040
-// CHECK:         IMAGE_SCN_CNT_INITIALIZED_DATA
-// CHECK:         IMAGE_SCN_ALIGN_4BYTES
-// CHECK:         IMAGE_SCN_MEM_READ
-// CHECK:         IMAGE_SCN_MEM_WRITE
-// CHECK:       SectionData              =
-// CHECK:         48 65 6C 6C 6F 20 57 6F - 72 6C 64 00             |Hello World.|
-// CHECK:       Relocations              = None
-// CHECK:     }
-// CHECK:   ]
-// CHECK:   Symbols                  = [
-// CHECK:     0 = {
-// CHECK:       Name                     = .text
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 1
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-// CHECK:       NumberOfAuxSymbols       = 1
-// CHECK:       AuxillaryData            =
-// CHECK:         15 00 00 00 02 00 00 00 - 00 00 00 00 01 00 00 00 |................|
-// CHECK:         00 00                                             |..|
-// CHECK:     }
-// CHECK:     2 = {
-// CHECK:       Name                     = .data
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 2
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-// CHECK:       NumberOfAuxSymbols       = 1
-// CHECK:       AuxillaryData            =
-// CHECK:         0C 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................|
-// CHECK:         00 00                                             |..|
-// CHECK:     }
-// CHECK:     4 = {
-// CHECK:       Name                     = _main
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 1
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_FUNCTION (2)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK:       NumberOfAuxSymbols       = 0
-// CHECK:       AuxillaryData            =
-// CHECK:     }
-// CHECK:     5 = {
-// CHECK:       Name                     = _printf
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 0
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK:       NumberOfAuxSymbols       = 0
-// CHECK:       AuxillaryData            =
-// CHECK:     }
-// CHECK:   ]
-// CHECK: }
+// This test checks that the COFF object emitter works for the most basic
+// programs.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s
+
+.def	 _main;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_main
+	.align	16, 0x90
+_main:                                  # @main
+# BB#0:                                 # %entry
+	subl	$4, %esp
+	movl	$L_.str, (%esp)
+	calll	_printf
+	xorl	%eax, %eax
+	addl	$4, %esp
+	ret
+
+	.data
+L_.str:                                 # @.str
+	.asciz	 "Hello World"
+
+// CHECK: {
+// CHECK:   MachineType              = IMAGE_FILE_MACHINE_I386 (0x14C)
+// CHECK:   NumberOfSections         = 2
+// CHECK:   TimeDateStamp            = {{[0-9]+}}
+// CHECK:   PointerToSymbolTable     = 0x{{[0-9A-F]+}}
+// CHECK:   NumberOfSymbols          = 6
+// CHECK:   SizeOfOptionalHeader     = 0
+// CHECK:   Characteristics          = 0x0
+// CHECK:   Sections                 = [
+// CHECK:     1 = {
+// CHECK:       Name                     = .text
+// CHECK:       VirtualSize              = 0
+// CHECK:       VirtualAddress           = 0
+// CHECK:       SizeOfRawData            = {{[0-9]+}}
+// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToRelocations     = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToLineNumbers     = 0x0
+// CHECK:       NumberOfRelocations      = 2
+// CHECK:       NumberOfLineNumbers      = 0
+// CHECK:       Charateristics           = 0x60500020
+// CHECK:         IMAGE_SCN_CNT_CODE
+// CHECK:         IMAGE_SCN_ALIGN_16BYTES
+// CHECK:         IMAGE_SCN_MEM_EXECUTE
+// CHECK:         IMAGE_SCN_MEM_READ
+// CHECK:       SectionData              =
+// CHECK:       Relocations              = [
+// CHECK:         0 = {
+// CHECK:           VirtualAddress           = 0x{{[0-9A-F]+}}
+// CHECK:           SymbolTableIndex         = 2
+// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
+// CHECK:           SymbolName               = .data
+// CHECK:         }
+// CHECK:         1 = {
+// CHECK:           VirtualAddress           = 0x{{[0-9A-F]+}}
+// CHECK:           SymbolTableIndex         = 5
+// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
+// CHECK:           SymbolName               = _printf
+// CHECK:         }
+// CHECK:       ]
+// CHECK:     }
+// CHECK:     2 = {
+// CHECK:       Name                     = .data
+// CHECK:       VirtualSize              = 0
+// CHECK:       VirtualAddress           = 0
+// CHECK:       SizeOfRawData            = {{[0-9]+}}
+// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToRelocations     = 0x0
+// CHECK:       PointerToLineNumbers     = 0x0
+// CHECK:       NumberOfRelocations      = 0
+// CHECK:       NumberOfLineNumbers      = 0
+// CHECK:       Charateristics           = 0xC0300040
+// CHECK:         IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK:         IMAGE_SCN_ALIGN_4BYTES
+// CHECK:         IMAGE_SCN_MEM_READ
+// CHECK:         IMAGE_SCN_MEM_WRITE
+// CHECK:       SectionData              =
+// CHECK:         48 65 6C 6C 6F 20 57 6F - 72 6C 64 00             |Hello World.|
+// CHECK:       Relocations              = None
+// CHECK:     }
+// CHECK:   ]
+// CHECK:   Symbols                  = [
+// CHECK:     0 = {
+// CHECK:       Name                     = .text
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 1
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
+// CHECK:       NumberOfAuxSymbols       = 1
+// CHECK:       AuxillaryData            =
+// CHECK:         15 00 00 00 02 00 00 00 - 00 00 00 00 01 00 00 00 |................|
+// CHECK:         00 00                                             |..|
+// CHECK:     }
+// CHECK:     2 = {
+// CHECK:       Name                     = .data
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 2
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
+// CHECK:       NumberOfAuxSymbols       = 1
+// CHECK:       AuxillaryData            =
+// CHECK:         0C 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................|
+// CHECK:         00 00                                             |..|
+// CHECK:     }
+// CHECK:     4 = {
+// CHECK:       Name                     = _main
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 1
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_FUNCTION (2)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK:       NumberOfAuxSymbols       = 0
+// CHECK:       AuxillaryData            =
+// CHECK:     }
+// CHECK:     5 = {
+// CHECK:       Name                     = _printf
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 0
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK:       NumberOfAuxSymbols       = 0
+// CHECK:       AuxillaryData            =
+// CHECK:     }
+// CHECK:   ]
+// CHECK: }
diff --git a/test/MC/COFF/bss.s b/test/MC/COFF/bss.s
index f44225b..3bed13d 100644
--- a/test/MC/COFF/bss.s
+++ b/test/MC/COFF/bss.s
@@ -1,15 +1,15 @@
-// The purpose of this test is to verify that bss sections are emited correctly.
-
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
-
-    .bss
-    .globl _g0
-    .align 4
-_g0:
-    .long 0
-
-// CHECK:      Name           = .bss
-// CHECK-NEXT: VirtualSize    = 0
-// CHECK-NEXT: VirtualAddress = 0
-// CHECK-NEXT: SizeOfRawData  = 4
+// The purpose of this test is to verify that bss sections are emited correctly.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+
+    .bss
+    .globl _g0
+    .align 4
+_g0:
+    .long 0
+
+// CHECK:      Name           = .bss
+// CHECK-NEXT: VirtualSize    = 0
+// CHECK-NEXT: VirtualAddress = 0
+// CHECK-NEXT: SizeOfRawData  = 4
diff --git a/test/MC/COFF/diff.s b/test/MC/COFF/diff.s
new file mode 100644
index 0000000..aa683f2
--- /dev/null
+++ b/test/MC/COFF/diff.s
@@ -0,0 +1,46 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-mingw32 %s | coff-dump.py | FileCheck %s
+
+	.def	 _foobar;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.long   0
+	.globl	_foobar
+	.align	16, 0x90
+_foobar:                                # @foobar
+# BB#0:
+	ret
+
+	.data
+	.globl	_rust_crate             # @rust_crate
+	.align	4
+_rust_crate:
+	.long   0
+	.long   _foobar
+	.long	_foobar-_rust_crate
+	.long	_foobar-_rust_crate
+
+// CHECK:      Name                     = .data
+// CHECK:      SectionData              =
+// CHECK-NEXT:   00 00 00 00 00 00 00 00 - 1C 00 00 00 20 00 00 00 |............ ...|
+// CHECK:        Relocations              = [
+// CHECK-NEXT:   0 = {
+// CHECK-NEXT:     VirtualAddress           = 0x4
+// CHECK-NEXT:     SymbolTableIndex         =
+// CHECK-NEXT:     Type                     = IMAGE_REL_I386_DIR32 (6)
+// CHECK-NEXT:     SymbolName               = _foobar
+// CHECK-NEXT:   }
+// CHECK-NEXT:   1 = {
+// CHECK-NEXT:     VirtualAddress           = 0x8
+// CHECK-NEXT:     SymbolTableIndex         = 0
+// CHECK-NEXT:     Type                     = IMAGE_REL_I386_REL32 (20)
+// CHECK-NEXT:     SymbolName               = .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   2 = {
+// CHECK-NEXT:     VirtualAddress           = 0xC
+// CHECK-NEXT:     SymbolTableIndex         = 0
+// CHECK-NEXT:     Type                     = IMAGE_REL_I386_REL32 (20)
+// CHECK-NEXT:     SymbolName               = .text
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/COFF/simple-fixups.s b/test/MC/COFF/simple-fixups.s
index f86f4a9..4c9b4d4 100644
--- a/test/MC/COFF/simple-fixups.s
+++ b/test/MC/COFF/simple-fixups.s
@@ -1,50 +1,50 @@
-// The purpose of this test is to verify that we do not produce unneeded
-// relocations when symbols are in the same section and we know their offset.
-
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
-// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
-
-	.def	 _foo;
-	.scl	2;
-	.type	32;
-	.endef
-	.text
-	.globl	_foo
-	.align	16, 0x90
-_foo:                                   # @foo
-# BB#0:                                 # %e
-	.align	16, 0x90
-LBB0_1:                                 # %i
-                                        # =>This Inner Loop Header: Depth=1
-	jmp	LBB0_1
-
-	.def	 _bar;
-	.scl	2;
-	.type	32;
-	.endef
-	.globl	_bar
-	.align	16, 0x90
-_bar:                                   # @bar
-# BB#0:                                 # %e
-	.align	16, 0x90
-LBB1_1:                                 # %i
-                                        # =>This Inner Loop Header: Depth=1
-	jmp	LBB1_1
-
-	.def	 _baz;
-	.scl	2;
-	.type	32;
-	.endef
-	.globl	_baz
-	.align	16, 0x90
-_baz:                                   # @baz
-# BB#0:                                 # %e
-	subl	$4, %esp
-Ltmp0:
-	calll	_baz
-	addl	$4, %esp
-	ret
-
-// CHECK:     Sections = [
-// CHECK-NOT: NumberOfRelocations = {{[^0]}}
-// CHECK:     Symbols = [
+// The purpose of this test is to verify that we do not produce unneeded
+// relocations when symbols are in the same section and we know their offset.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+
+	.def	 _foo;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_foo
+	.align	16, 0x90
+_foo:                                   # @foo
+# BB#0:                                 # %e
+	.align	16, 0x90
+LBB0_1:                                 # %i
+                                        # =>This Inner Loop Header: Depth=1
+	jmp	LBB0_1
+
+	.def	 _bar;
+	.scl	2;
+	.type	32;
+	.endef
+	.globl	_bar
+	.align	16, 0x90
+_bar:                                   # @bar
+# BB#0:                                 # %e
+	.align	16, 0x90
+LBB1_1:                                 # %i
+                                        # =>This Inner Loop Header: Depth=1
+	jmp	LBB1_1
+
+	.def	 _baz;
+	.scl	2;
+	.type	32;
+	.endef
+	.globl	_baz
+	.align	16, 0x90
+_baz:                                   # @baz
+# BB#0:                                 # %e
+	subl	$4, %esp
+Ltmp0:
+	calll	_baz
+	addl	$4, %esp
+	ret
+
+// CHECK:     Sections = [
+// CHECK-NOT: NumberOfRelocations = {{[^0]}}
+// CHECK:     Symbols = [
diff --git a/test/MC/COFF/symbol-alias.s b/test/MC/COFF/symbol-alias.s
index ede6b53..03f07b2 100644
--- a/test/MC/COFF/symbol-alias.s
+++ b/test/MC/COFF/symbol-alias.s
@@ -1,62 +1,62 @@
-// The purpose of this test is to verify that symbol aliases
-// (@foo = alias <type> @bar) generate the correct entries in the symbol table.
-// They should be identical except for the name.
-
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
-
-	.def	 _foo;
-	.scl	2;
-	.type	32;
-	.endef
-	.text
-	.globl	_foo
-	.align	16, 0x90
-_foo:                                   # @foo
-# BB#0:                                 # %entry
-	ret
-
-	.data
-	.globl	_bar                    # @bar
-	.align	4
-_bar:
-	.long	0                       # 0x0
-
-
-	.globl	_foo_alias
-_foo_alias = _foo
-	.globl	_bar_alias
-_bar_alias = _bar
-
-// CHECK:      Name               = {{_?}}foo
-// CHECK-NEXT: Value              = [[FOO_VALUE:.*$]]
-// CHECK-NEXT: SectionNumber      = [[FOO_SECTION_NUMBER:.*$]]
-// CHECK-NEXT: SimpleType         = [[FOO_SIMPLE_TYPE:.*$]]
-// CHECK-NEXT: ComplexType        = [[FOO_COMPLEX_TYPE:.*$]]
-// CHECK-NEXT: StorageClass       = [[FOO_STORAGE_CLASS:.*$]]
-// CHECK-NEXT: NumberOfAuxSymbols = [[FOO_NUMBER_OF_AUX_SYMBOLS:.*$]]
-
-// CHECK:      Name               = {{_?}}bar
-// CHECK-NEXT: Value              = [[BAR_VALUE:.*$]]
-// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER:.*$]]
-// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE:.*$]]
-// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE:.*$]]
-// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS:.*$]]
-// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS:.*$]]
-
-// CHECK:      Name               = {{_?}}foo_alias
-// CHECK-NEXT: Value              = [[FOO_VALUE]]
-// CHECK-NEXT: SectionNumber      = [[FOO_SECTION_NUMBER]]
-// CHECK-NEXT: SimpleType         = [[FOO_SIMPLE_TYPE]]
-// CHECK-NEXT: ComplexType        = [[FOO_COMPLEX_TYPE]]
-// CHECK-NEXT: StorageClass       = [[FOO_STORAGE_CLASS]]
-// CHECK-NEXT: NumberOfAuxSymbols = [[FOO_NUMBER_OF_AUX_SYMBOLS]]
-
-// CHECK:      Name               = {{_?}}bar_alias
-// CHECK-NEXT: Value              = [[BAR_VALUE]]
-// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER]]
-// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE]]
-// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE]]
-// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS]]
-// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS]]
-
+// The purpose of this test is to verify that symbol aliases
+// (@foo = alias <type> @bar) generate the correct entries in the symbol table.
+// They should be identical except for the name.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+
+	.def	 _foo;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_foo
+	.align	16, 0x90
+_foo:                                   # @foo
+# BB#0:                                 # %entry
+	ret
+
+	.data
+	.globl	_bar                    # @bar
+	.align	4
+_bar:
+	.long	0                       # 0x0
+
+
+	.globl	_foo_alias
+_foo_alias = _foo
+	.globl	_bar_alias
+_bar_alias = _bar
+
+// CHECK:      Name               = {{_?}}foo
+// CHECK-NEXT: Value              = [[FOO_VALUE:.*$]]
+// CHECK-NEXT: SectionNumber      = [[FOO_SECTION_NUMBER:.*$]]
+// CHECK-NEXT: SimpleType         = [[FOO_SIMPLE_TYPE:.*$]]
+// CHECK-NEXT: ComplexType        = [[FOO_COMPLEX_TYPE:.*$]]
+// CHECK-NEXT: StorageClass       = [[FOO_STORAGE_CLASS:.*$]]
+// CHECK-NEXT: NumberOfAuxSymbols = [[FOO_NUMBER_OF_AUX_SYMBOLS:.*$]]
+
+// CHECK:      Name               = {{_?}}bar
+// CHECK-NEXT: Value              = [[BAR_VALUE:.*$]]
+// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER:.*$]]
+// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE:.*$]]
+// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE:.*$]]
+// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS:.*$]]
+// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS:.*$]]
+
+// CHECK:      Name               = {{_?}}foo_alias
+// CHECK-NEXT: Value              = [[FOO_VALUE]]
+// CHECK-NEXT: SectionNumber      = [[FOO_SECTION_NUMBER]]
+// CHECK-NEXT: SimpleType         = [[FOO_SIMPLE_TYPE]]
+// CHECK-NEXT: ComplexType        = [[FOO_COMPLEX_TYPE]]
+// CHECK-NEXT: StorageClass       = [[FOO_STORAGE_CLASS]]
+// CHECK-NEXT: NumberOfAuxSymbols = [[FOO_NUMBER_OF_AUX_SYMBOLS]]
+
+// CHECK:      Name               = {{_?}}bar_alias
+// CHECK-NEXT: Value              = [[BAR_VALUE]]
+// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER]]
+// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE]]
+// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE]]
+// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS]]
+// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS]]
+
diff --git a/test/MC/COFF/symbol-fragment-offset.s b/test/MC/COFF/symbol-fragment-offset.s
index c314ac2..1df8baa 100644
--- a/test/MC/COFF/symbol-fragment-offset.s
+++ b/test/MC/COFF/symbol-fragment-offset.s
@@ -1,187 +1,187 @@
-// The purpose of this test is to see if the COFF object writer is emitting the
-// proper relocations for multiple pieces of data in a single data fragment.
-
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
-// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s
-
-.def	 _main;
-	.scl	2;
-	.type	32;
-	.endef
-	.text
-	.globl	_main
-	.align	16, 0x90
-_main:                                  # @main
-# BB#0:                                 # %entry
-	subl	$4, %esp
-	movl	$L_.str0, (%esp)
-	calll	_printf
-	movl	$L_.str1, (%esp)
-	calll	_puts
-	movl	$L_.str2, (%esp)
-	calll	_puts
-	xorl	%eax, %eax
-	addl	$4, %esp
-	ret
-
-	.data
-L_.str0:                                # @.str0
-	.asciz	 "Hello "
-
-L_.str1:                                # @.str1
-	.asciz	 "World!"
-
-	.align	16                      # @.str2
-L_.str2:
-	.asciz	 "I'm The Last Line."
-
-// CHECK: {
-// CHECK:   MachineType              = IMAGE_FILE_MACHINE_I386 (0x14C)
-// CHECK:   NumberOfSections         = 2
-// CHECK:   TimeDateStamp            = {{[0-9]+}}
-// CHECK:   PointerToSymbolTable     = 0x{{[0-9A-F]+}}
-// CHECK:   NumberOfSymbols          = 7
-// CHECK:   SizeOfOptionalHeader     = 0
-// CHECK:   Characteristics          = 0x0
-// CHECK:   Sections                 = [
-// CHECK:     1 = {
-// CHECK:       Name                     = .text
-// CHECK:       VirtualSize              = 0
-// CHECK:       VirtualAddress           = 0
-// CHECK:       SizeOfRawData            = {{[0-9]+}}
-// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToRelocations     = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToLineNumbers     = 0x0
-// CHECK:       NumberOfRelocations      = 6
-// CHECK:       NumberOfLineNumbers      = 0
-// CHECK:       Charateristics           = 0x60500020
-// CHECK:         IMAGE_SCN_CNT_CODE
-// CHECK:         IMAGE_SCN_ALIGN_16BYTES
-// CHECK:         IMAGE_SCN_MEM_EXECUTE
-// CHECK:         IMAGE_SCN_MEM_READ
-// CHECK:       SectionData              =
-// CHECK:         83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 C7 |.....$..........|
-// CHECK:         04 24 07 00 00 00 E8 00 - 00 00 00 C7 04 24 10 00 |.$...........$..|
-// CHECK:         00 00 E8 00 00 00 00 31 - C0 83 C4 04 C3 |.......1.....|
-// CHECK:       Relocations              = [
-// CHECK:         0 = {
-// CHECK:           VirtualAddress           = 0x6
-// CHECK:           SymbolTableIndex         = 2
-// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
-// CHECK:           SymbolName               = .data
-// CHECK:         }
-// CHECK:         1 = {
-// CHECK:           VirtualAddress           = 0xB
-// CHECK:           SymbolTableIndex         = 5
-// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK:           SymbolName               = _printf
-// CHECK:         }
-// CHECK:         2 = {
-// CHECK:           VirtualAddress           = 0x12
-// CHECK:           SymbolTableIndex         = 2
-// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
-// CHECK:           SymbolName               = .data
-// CHECK:         }
-// CHECK:         3 = {
-// CHECK:           VirtualAddress           = 0x17
-// CHECK:           SymbolTableIndex         = 6
-// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK:           SymbolName               = _puts
-// CHECK:         }
-// CHECK:         4 = {
-// CHECK:           VirtualAddress           = 0x1E
-// CHECK:           SymbolTableIndex         = 2
-// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
-// CHECK:           SymbolName               = .data
-// CHECK:         }
-// CHECK:         5 = {
-// CHECK:           VirtualAddress           = 0x23
-// CHECK:           SymbolTableIndex         = 6
-// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK:           SymbolName               = _puts
-// CHECK:         }
-// CHECK:       ]
-// CHECK:     }
-// CHECK:     2 = {
-// CHECK:       Name                     = .data
-// CHECK:       VirtualSize              = 0
-// CHECK:       VirtualAddress           = 0
-// CHECK:       SizeOfRawData            = {{[0-9]+}}
-// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToRelocations     = 0x0
-// CHECK:       PointerToLineNumbers     = 0x0
-// CHECK:       NumberOfRelocations      = 0
-// CHECK:       NumberOfLineNumbers      = 0
-// CHECK:       Charateristics           = 0xC0500040
-// CHECK:         IMAGE_SCN_CNT_INITIALIZED_DATA
-// CHECK:         IMAGE_SCN_ALIGN_16BYTES
-// CHECK:         IMAGE_SCN_MEM_READ
-// CHECK:         IMAGE_SCN_MEM_WRITE
-// CHECK:       SectionData              =
-// CHECK:         48 65 6C 6C 6F 20 00 57 - 6F 72 6C 64 21 00 00 00 |Hello .World!...|
-// CHECK:         49 27 6D 20 54 68 65 20 - 4C 61 73 74 20 4C 69 6E |I'm The Last Lin|
-// CHECK:         65 2E 00                                          |e..|
-// CHECK:       Relocations              = None
-// CHECK:     }
-// CHECK:   ]
-// CHECK:   Symbols                  = [
-// CHECK:     0 = {
-// CHECK:       Name                     = .text
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 1
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-// CHECK:       NumberOfAuxSymbols       = 1
-// CHECK:       AuxillaryData            =
-// CHECK:         2D 00 00 00 06 00 00 00 - 00 00 00 00 01 00 00 00 |-...............|
-// CHECK:         00 00                                             |..|
-
-// CHECK:     }
-// CHECK:     2 = {
-// CHECK:       Name                     = .data
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 2
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-// CHECK:       NumberOfAuxSymbols       = 1
-// CHECK:       AuxillaryData            =
-// CHECK:         23 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |#...............|
-// CHECK:         00 00                                             |..|
-
-// CHECK:     }
-// CHECK:     4 = {
-// CHECK:       Name                     = _main
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 1
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_FUNCTION (2)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK:       NumberOfAuxSymbols       = 0
-// CHECK:       AuxillaryData            =
-
-// CHECK:     5 = {
-// CHECK:       Name                     = _printf
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 0
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK:       NumberOfAuxSymbols       = 0
-// CHECK:       AuxillaryData            =
-
-// CHECK:     }
-// CHECK:     6 = {
-// CHECK:       Name                     = _puts
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 0
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK:       NumberOfAuxSymbols       = 0
-// CHECK:       AuxillaryData            =
-
-// CHECK:     }
-// CHECK:   ]
-// CHECK: }
+// The purpose of this test is to see if the COFF object writer is emitting the
+// proper relocations for multiple pieces of data in a single data fragment.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s
+
+.def	 _main;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_main
+	.align	16, 0x90
+_main:                                  # @main
+# BB#0:                                 # %entry
+	subl	$4, %esp
+	movl	$L_.str0, (%esp)
+	calll	_printf
+	movl	$L_.str1, (%esp)
+	calll	_puts
+	movl	$L_.str2, (%esp)
+	calll	_puts
+	xorl	%eax, %eax
+	addl	$4, %esp
+	ret
+
+	.data
+L_.str0:                                # @.str0
+	.asciz	 "Hello "
+
+L_.str1:                                # @.str1
+	.asciz	 "World!"
+
+	.align	16                      # @.str2
+L_.str2:
+	.asciz	 "I'm The Last Line."
+
+// CHECK: {
+// CHECK:   MachineType              = IMAGE_FILE_MACHINE_I386 (0x14C)
+// CHECK:   NumberOfSections         = 2
+// CHECK:   TimeDateStamp            = {{[0-9]+}}
+// CHECK:   PointerToSymbolTable     = 0x{{[0-9A-F]+}}
+// CHECK:   NumberOfSymbols          = 7
+// CHECK:   SizeOfOptionalHeader     = 0
+// CHECK:   Characteristics          = 0x0
+// CHECK:   Sections                 = [
+// CHECK:     1 = {
+// CHECK:       Name                     = .text
+// CHECK:       VirtualSize              = 0
+// CHECK:       VirtualAddress           = 0
+// CHECK:       SizeOfRawData            = {{[0-9]+}}
+// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToRelocations     = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToLineNumbers     = 0x0
+// CHECK:       NumberOfRelocations      = 6
+// CHECK:       NumberOfLineNumbers      = 0
+// CHECK:       Charateristics           = 0x60500020
+// CHECK:         IMAGE_SCN_CNT_CODE
+// CHECK:         IMAGE_SCN_ALIGN_16BYTES
+// CHECK:         IMAGE_SCN_MEM_EXECUTE
+// CHECK:         IMAGE_SCN_MEM_READ
+// CHECK:       SectionData              =
+// CHECK:         83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 C7 |.....$..........|
+// CHECK:         04 24 07 00 00 00 E8 00 - 00 00 00 C7 04 24 10 00 |.$...........$..|
+// CHECK:         00 00 E8 00 00 00 00 31 - C0 83 C4 04 C3 |.......1.....|
+// CHECK:       Relocations              = [
+// CHECK:         0 = {
+// CHECK:           VirtualAddress           = 0x6
+// CHECK:           SymbolTableIndex         = 2
+// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
+// CHECK:           SymbolName               = .data
+// CHECK:         }
+// CHECK:         1 = {
+// CHECK:           VirtualAddress           = 0xB
+// CHECK:           SymbolTableIndex         = 5
+// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
+// CHECK:           SymbolName               = _printf
+// CHECK:         }
+// CHECK:         2 = {
+// CHECK:           VirtualAddress           = 0x12
+// CHECK:           SymbolTableIndex         = 2
+// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
+// CHECK:           SymbolName               = .data
+// CHECK:         }
+// CHECK:         3 = {
+// CHECK:           VirtualAddress           = 0x17
+// CHECK:           SymbolTableIndex         = 6
+// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
+// CHECK:           SymbolName               = _puts
+// CHECK:         }
+// CHECK:         4 = {
+// CHECK:           VirtualAddress           = 0x1E
+// CHECK:           SymbolTableIndex         = 2
+// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
+// CHECK:           SymbolName               = .data
+// CHECK:         }
+// CHECK:         5 = {
+// CHECK:           VirtualAddress           = 0x23
+// CHECK:           SymbolTableIndex         = 6
+// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
+// CHECK:           SymbolName               = _puts
+// CHECK:         }
+// CHECK:       ]
+// CHECK:     }
+// CHECK:     2 = {
+// CHECK:       Name                     = .data
+// CHECK:       VirtualSize              = 0
+// CHECK:       VirtualAddress           = 0
+// CHECK:       SizeOfRawData            = {{[0-9]+}}
+// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToRelocations     = 0x0
+// CHECK:       PointerToLineNumbers     = 0x0
+// CHECK:       NumberOfRelocations      = 0
+// CHECK:       NumberOfLineNumbers      = 0
+// CHECK:       Charateristics           = 0xC0500040
+// CHECK:         IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK:         IMAGE_SCN_ALIGN_16BYTES
+// CHECK:         IMAGE_SCN_MEM_READ
+// CHECK:         IMAGE_SCN_MEM_WRITE
+// CHECK:       SectionData              =
+// CHECK:         48 65 6C 6C 6F 20 00 57 - 6F 72 6C 64 21 00 00 00 |Hello .World!...|
+// CHECK:         49 27 6D 20 54 68 65 20 - 4C 61 73 74 20 4C 69 6E |I'm The Last Lin|
+// CHECK:         65 2E 00                                          |e..|
+// CHECK:       Relocations              = None
+// CHECK:     }
+// CHECK:   ]
+// CHECK:   Symbols                  = [
+// CHECK:     0 = {
+// CHECK:       Name                     = .text
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 1
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
+// CHECK:       NumberOfAuxSymbols       = 1
+// CHECK:       AuxillaryData            =
+// CHECK:         2D 00 00 00 06 00 00 00 - 00 00 00 00 01 00 00 00 |-...............|
+// CHECK:         00 00                                             |..|
+
+// CHECK:     }
+// CHECK:     2 = {
+// CHECK:       Name                     = .data
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 2
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
+// CHECK:       NumberOfAuxSymbols       = 1
+// CHECK:       AuxillaryData            =
+// CHECK:         23 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |#...............|
+// CHECK:         00 00                                             |..|
+
+// CHECK:     }
+// CHECK:     4 = {
+// CHECK:       Name                     = _main
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 1
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_FUNCTION (2)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK:       NumberOfAuxSymbols       = 0
+// CHECK:       AuxillaryData            =
+
+// CHECK:     5 = {
+// CHECK:       Name                     = _printf
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 0
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK:       NumberOfAuxSymbols       = 0
+// CHECK:       AuxillaryData            =
+
+// CHECK:     }
+// CHECK:     6 = {
+// CHECK:       Name                     = _puts
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 0
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK:       NumberOfAuxSymbols       = 0
+// CHECK:       AuxillaryData            =
+
+// CHECK:     }
+// CHECK:   ]
+// CHECK: }
diff --git a/test/MC/COFF/weak.s b/test/MC/COFF/weak.s
index a240d71..0f99313 100644
--- a/test/MC/COFF/weak.s
+++ b/test/MC/COFF/weak.s
@@ -1,51 +1,51 @@
-// This tests that default-null weak symbols (a GNU extension) are created
-// properly via the .weak directive.
-
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 < %s | coff-dump.py | FileCheck %s
-
-    .def    _main;
-    .scl    2;
-    .type   32;
-    .endef
-    .text
-    .globl  _main
-    .align  16, 0x90
-_main:                                  # @main
-# BB#0:                                 # %entry
-    subl    $4, %esp
-    movl    $_test_weak, %eax
-    testl   %eax, %eax
-    je      LBB0_2
-# BB#1:                                 # %if.then
-    calll   _test_weak
-    movl    $1, %eax
-    addl    $4, %esp
-    ret
-LBB0_2:                                 # %return
-    xorl    %eax, %eax
-    addl    $4, %esp
-    ret
-
-    .weak   _test_weak
-
-// CHECK: Symbols = [
-
-// CHECK:      Name               = _test_weak
-// CHECK-NEXT: Value              = 0
-// CHECK-NEXT: SectionNumber      = 0
-// CHECK-NEXT: SimpleType         = IMAGE_SYM_TYPE_NULL (0)
-// CHECK-NEXT: ComplexType        = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK-NEXT: StorageClass       = IMAGE_SYM_CLASS_WEAK_EXTERNAL (105)
-// CHECK-NEXT: NumberOfAuxSymbols = 1
-// CHECK-NEXT: AuxillaryData      =
-// CHECK-NEXT: 05 00 00 00 02 00 00 00 - 00 00 00 00 00 00 00 00 |................|
-// CHECK-NEXT: 00 00                                             |..|
-
-// CHECK:      Name               = .weak._test_weak.default
-// CHECK-NEXT: Value              = 0
-// CHECK-NEXT: SectionNumber      = 65535
-// CHECK-NEXT: SimpleType         = IMAGE_SYM_TYPE_NULL (0)
-// CHECK-NEXT: ComplexType        = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK-NEXT: StorageClass       = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK-NEXT: NumberOfAuxSymbols = 0
-// CHECK-NEXT: AuxillaryData      =
+// This tests that default-null weak symbols (a GNU extension) are created
+// properly via the .weak directive.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 < %s | coff-dump.py | FileCheck %s
+
+    .def    _main;
+    .scl    2;
+    .type   32;
+    .endef
+    .text
+    .globl  _main
+    .align  16, 0x90
+_main:                                  # @main
+# BB#0:                                 # %entry
+    subl    $4, %esp
+    movl    $_test_weak, %eax
+    testl   %eax, %eax
+    je      LBB0_2
+# BB#1:                                 # %if.then
+    calll   _test_weak
+    movl    $1, %eax
+    addl    $4, %esp
+    ret
+LBB0_2:                                 # %return
+    xorl    %eax, %eax
+    addl    $4, %esp
+    ret
+
+    .weak   _test_weak
+
+// CHECK: Symbols = [
+
+// CHECK:      Name               = _test_weak
+// CHECK-NEXT: Value              = 0
+// CHECK-NEXT: SectionNumber      = 0
+// CHECK-NEXT: SimpleType         = IMAGE_SYM_TYPE_NULL (0)
+// CHECK-NEXT: ComplexType        = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK-NEXT: StorageClass       = IMAGE_SYM_CLASS_WEAK_EXTERNAL (105)
+// CHECK-NEXT: NumberOfAuxSymbols = 1
+// CHECK-NEXT: AuxillaryData      =
+// CHECK-NEXT: 05 00 00 00 02 00 00 00 - 00 00 00 00 00 00 00 00 |................|
+// CHECK-NEXT: 00 00                                             |..|
+
+// CHECK:      Name               = .weak._test_weak.default
+// CHECK-NEXT: Value              = 0
+// CHECK-NEXT: SectionNumber      = 65535
+// CHECK-NEXT: SimpleType         = IMAGE_SYM_TYPE_NULL (0)
+// CHECK-NEXT: ComplexType        = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK-NEXT: StorageClass       = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK-NEXT: NumberOfAuxSymbols = 0
+// CHECK-NEXT: AuxillaryData      =
diff --git a/test/MC/Disassembler/ARM/arm-tests.txt b/test/MC/Disassembler/ARM/arm-tests.txt
index 0f6aeb7..ade2952 100644
--- a/test/MC/Disassembler/ARM/arm-tests.txt
+++ b/test/MC/Disassembler/ARM/arm-tests.txt
@@ -1,7 +1,16 @@
 # RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
 
+# CHECK:	addpl	r4, pc, #19, #8
+0x4c 0x45 0x8f 0x52
+
 # CHECK:	b	#0
-0xfe 0xff 0xff 0xea
+0x00 0x00 0x00 0xea
+
+# CHECK:	bl	#7732
+0x8d 0x07 0x00 0xeb
+
+# CHECK:	bleq	#-4
+0xff 0xff 0xff 0x0b
 
 # CHECK:	bfc	r8, #0, #16
 0x1f 0x80 0xcf 0xe7
@@ -12,6 +21,12 @@
 # CHECK:	mov	pc, lr
 0x0e 0xf0 0xa0 0xe1
 
+# CHECK:	mov	pc, #255, #2
+0xff 0xf1 0xa0 0xe3
+
+# CHECK:	movw	r7, #4096
+0x00 0x70 0x01 0xe3
+
 # CHECK:	cmn	r0, #1
 0x01 0x00 0x70 0xe3
 
@@ -36,6 +51,9 @@
 # CHECK:	ldr	r0, [r2], #15
 0x0f 0x00 0x92 0xe4
 
+# CHECK:	ldr	r5, [r7, -r10, lsl #2]
+0x0a 0x51 0x17 0xe7
+
 # CHECK:	ldrh	r0, [r2], #0
 0xb0 0x00 0xd2 0xe0
 
@@ -54,7 +72,7 @@
 # CHECK:	movt	r8, #65535
 0xff 0x8f 0x4f 0xe3
 
-# CHECK:	mvnspl	r7, #245, 2
+# CHECK:	mvnspl	r7, #245, #2
 0xf5 0x71 0xf0 0x53
 
 # CHECK-NOT:	orr	r7, r8, r7, rrx #0
@@ -64,9 +82,12 @@
 # CHECK:	pkhbt	r8, r9, r10, lsl #4
 0x1a 0x82 0x89 0xe6
 
-# CHECK-NOT:	pkhbtls	pc, r11, r11, lsl #0
-# CHECK:	pkhbtls	pc, r11, r11
-0x1b 0xf0 0x8b 0x96
+# CHECK-NOT:	pkhbtls	r10, r11, r11, lsl #0
+# CHECK:	pkhbtls	r10, r11, r11
+0x1b 0xa0 0x8b 0x96
+
+# CHECK:	pkhtbmi	lr, r1, r6, asr #21
+0xd6 0xea 0x81 0x46
 
 # CHECK:	pop	{r0, r2, r4, r6, r8, r10}
 0x55 0x05 0xbd 0xe8
@@ -130,3 +151,136 @@
 
 # CHECK: msr cpsr_fc, r0
 0x00 0xf0 0x29 0xe1
+
+# CHECK:	msrmi	cpsr_c, #241, #8
+0xf1 0xf4 0x21 0x43
+
+# CHECK: rsbs r6, r7, r8
+0x08 0x60 0x77 0xe0
+
+# CHECK: blxeq r5
+0x35 0xff 0x2f 0x01
+
+# CHECK: bx r12
+0x1c 0xff 0x2f 0xe1
+
+# CHECK:	uqadd16mi	r6, r11, r8
+0x18 0x60 0x6b 0x46
+
+# CHECK:	str	r0, [sp, #4]
+0x04 0x00 0x8d 0xe5
+
+# CHECK:	str	r1, [sp]
+0x00 0x10 0x8d 0xe5
+
+# CHECK:	ldr	r3, [pc, #144]
+0x90 0x30 0x9f 0xe5
+
+# CHECK:	ldr	r3, [r0, #-4]
+0x4 0x30 0x10 0xe5
+
+# CHECK:	ldr	r5, [sp, r0, lsl #1]!
+0x80 0x50 0xbd 0xe7
+
+# CHECK:	ldr	r5, [r7], -r0, lsr #2
+0x20 0x51 0x17 0xe6
+
+# CHECK:	strdeq	r2, r3, [r0], -r8
+0xf8 0x24 0x00 0x00
+
+# CHECK:	ldrdeq	r2, r3, [r0], -r12
+0xdc 0x24 0x00 0x00
+
+# CHECK:	ldrbt	r3, [r4], -r5, lsl #12
+0x05 0x36 0x74 0xe6
+
+# CHECK:	vcmpe.f64	d8, #0
+0xc0 0x8b 0xb5 0xee
+
+# CHECK:	vldmdb	r2!, {s7, s8, s9, s10, s11}
+0x05 0x3a 0x72 0xed
+
+# CHECK:	vldr.32	s23, [r2, #660]
+0xa5 0xba 0xd2 0xed
+
+# CHECK:	strtvc	r5, [r3], r0, lsr #20
+0x20 0x5a 0xa3 0x76
+
+# CHECK:	stmiblo	sp, {r0, r4, r8, r11, r12, pc}
+0x11 0x99 0x8d 0x39
+
+# CHECK:	ldmdb	sp, {r0, r4, r8, r11, r12, pc}
+0x11 0x99 0x1d 0xe9
+
+# CHECK:	swpge	r3, r2, [r6]
+0x92 0x30 0x06 0xa1
+
+# CHECK:	umull	r1, r2, r3, r4
+0x93 0x14 0x82 0xe0
+
+# CHECK:	pld	[pc, #-0]
+0x00 0xf0 0x1f 0xf5
+
+# CHECK:	pli	[pc, #-0]
+0x00 0xf0 0x5f 0xf4
+
+# CHECK:	pli	[r3, r1, lsl #2]
+0x01 0xf1 0xd3 0xf6
+
+# CHECK:	stc	p2, cr4, [r9], {157}
+0x9d 0x42 0x89 0xec
+
+# CHECK:	stc2	p2, cr4, [r9], {157}
+0x9d 0x42 0x89 0xfc
+
+# CHECK:	blx	#60
+0x0f 0x00 0x00 0xfa
+
+# CHECK-NOT:	adcs	r10, r8, r0, asr #6
+# CHECK:	adcshi	r10, r8, r0, asr #6
+0x40 0xa3 0xb8 0x80
+
+# CHECK:	adcshi	r10, r8, r0, asr r3
+0x50 0xa3 0xb8 0x80
+
+# CHECK:	streq	r1, [sp], #-1567
+0x1f 0x16 0xd 0x4
+
+# CHECK:	mrchs	p2, #3, r11, c13, c6, #6
+0xd6 0xb2 0x7d 0x2e
+
+# CHECK:	smlsldx	r4, r12, r11, r4
+0x7b 0x44 0x4c 0xe7
+
+# CHECK:	lsl	r3, r2, r1
+0x12 0x31 0xa0 0xe1
+
+# CHECK:	sxtab	r9, r8, r5
+0x75 0x90 0xa8 0xe6
+
+# CHECK:	sxtb	r9, r5, ror #8
+0x75 0x94 0xaf 0xe6
+
+# CHECK:	bfc	r5, #0, #16
+0x1f 0x50 0xcf 0xe7
+
+# CHECK:	bfi	r5, r6, #0, #16
+0x16 0x50 0xcf 0xe7
+
+# CHECK:	sbfx	r5, r6, #8, #8
+0x56 0x54 0xa7 0xe7
+
+# CHECK:	rsb	pc, r5, r0
+0x00 0xf0 0x65 0xe0
+
+# CHECK:	uqadd8	r5, r6, r7
+0x97 0x5f 0x66 0xe6
+
+# CHECK:	uqsax	r5, r6, r7
+0x57 0x5f 0x66 0xe6
+
+# CHECK:	smmlareq	r0, r0, r0, r0
+0x30 0x00 0x50 0x07
+
+# CHECK:	nop
+0x00 0xf0 0x20 0xe3
diff --git a/test/MC/Disassembler/ARM/invalid-BFI-arm.txt b/test/MC/Disassembler/ARM/invalid-BFI-arm.txt
new file mode 100644
index 0000000..ca0c1ab
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-BFI-arm.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=60 Name=BFI Format=ARM_FORMAT_DPFRM(4)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 1| 1: 1: 0: 0| 1: 1: 1: 1| 1: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 1| 0: 1: 1: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# if d == 15 then UNPREDICTABLE;
+0x16 0xf0 0xcf 0xe7
diff --git a/test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt b/test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt
new file mode 100644
index 0000000..66c43c2
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=2249 Name=tBcc Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 1: 1: 0: 1| 1: 1: 1: 0| 0: 1: 1: 0| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+# 
+# if cond = '1110' then UNDEFINED
+0x6f 0xde
diff --git a/test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt b/test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt
new file mode 100644
index 0000000..10748e9
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# invalid imod value (0b01)
+0xc0 0x67 0x4 0xf1
diff --git a/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt b/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt
new file mode 100644
index 0000000..5202217
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# invalid (imod, M, iflags) combination
+0x93 0x1c 0x02 0xf1
diff --git a/test/MC/Disassembler/ARM/invalid-DMB-thumb.txt b/test/MC/Disassembler/ARM/invalid-DMB-thumb.txt
new file mode 100644
index 0000000..0a4be68
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-DMB-thumb.txt
@@ -0,0 +1,16 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=1908 Name=t2DMB Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 0: 1: 1| 1: 0: 1: 1| 1: 1: 1: 1| 1: 0: 0: 0| 1: 1: 1: 1| 0: 1: 0: 1| 0: 0: 0: 1|
+# -------------------------------------------------------------------------------------------------
+# 
+# Inst{3-0} encodes the option: SY, ST, ISH, ISHST, NSH, NSHST, OSH, OSHST.
+# Reject invalid encodings.
+#
+# See also A8.6.42 DSB
+# All other encodings of option are reserved. It is IMPLEMENTATION DEFINED whether options
+# other than SY are implemented. All unsupported and reserved options must execute as a full
+# system DSB operation, but software must not rely on this behavior.
+0xbf 0xf3 0x51 0x8f
diff --git a/test/MC/Disassembler/ARM/invalid-DSB-arm.txt b/test/MC/Disassembler/ARM/invalid-DSB-arm.txt
new file mode 100644
index 0000000..afa2baf
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-DSB-arm.txt
@@ -0,0 +1,16 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=102 Name=DSB Format=ARM_FORMAT_MISCFRM(26)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 1: 0: 1| 0: 1: 1: 1| 1: 1: 1: 1| 1: 1: 1: 1| 0: 0: 0: 0| 0: 1: 0: 0| 0: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+# 
+# Inst{3-0} encodes the option: SY, ST, ISH, ISHST, NSH, NSHST, OSH, OSHST.
+# Reject invalid encodings.
+#
+# See also A8.6.42 DSB
+# All other encodings of option are reserved. It is IMPLEMENTATION DEFINED whether options
+# other than SY are implemented. All unsupported and reserved options must execute as a full
+# system DSB operation, but software must not rely on this behavior.
+0x40 0xf0 0x7f 0xf5
diff --git a/test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt b/test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt
new file mode 100644
index 0000000..b966a9d
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=0 Name=PHI Format=(42)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 0: 1| 1: 1: 0: 0| 0: 0: 0: 1| 1: 1: 1: 1| 1: 0: 1: 1| 0: 1: 0: 0| 1: 0: 0: 1| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# The bytes have 0b0000 for P,U,D,W; from A8.6.51, it is undefined.
+0x92 0xb4 0x1f 0xdc
+
diff --git a/test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt b/test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt
new file mode 100644
index 0000000..7a35c2d
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=140 Name=LDRB_POST Format=ARM_FORMAT_LDFRM(6)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 0| 1: 1: 0: 1| 0: 1: 1: 1| 0: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 1: 0: 1|
+# -------------------------------------------------------------------------------------------------
+# 
+# if wback && (n == 15 || n == t) then UNPREDICTABLE
+0x05 0x70 0xd7 0xe6
diff --git a/test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt b/test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt
new file mode 100644
index 0000000..da2e6be
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=1930 Name=t2LDRD_PRE Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 1: 0: 0: 1| 1: 1: 1: 1| 1: 1: 1: 1| 1: 1: 1: 0| 1: 0: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+# 
+# A8.6.66 LDRD (immediate)
+# if Rn = '1111' then SEE LDRD (literal)
+# A8.6.67 LDRD (literal)
+# Inst{21} = 0
+0xff 0xe9 0x0 0xeb
diff --git a/test/MC/Disassembler/ARM/invalid-LDRT-arm.txt b/test/MC/Disassembler/ARM/invalid-LDRT-arm.txt
new file mode 100644
index 0000000..fb2ce20
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-LDRT-arm.txt
@@ -0,0 +1,12 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=0 Name=PHI Format=(42)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 0| 0: 0: 1: 1| 0: 1: 1: 1| 0: 1: 0: 1| 0: 0: 0: 1| 0: 0: 0: 1| 0: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# The bytes have Inst{4} = 1, so it's not an LDRT Encoding A2 instruction.
+0x10 0x51 0x37 0xe6
+
+
diff --git a/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt b/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt
new file mode 100644
index 0000000..ad79986
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# LDR_PRE/POST has encoding Inst{4} = 0.
+0xde 0x69 0x18 0x46
diff --git a/test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt b/test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt
new file mode 100644
index 0000000..36c1124
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=165 Name=LDR_PRE Format=ARM_FORMAT_LDFRM(6)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 1| 1: 0: 1: 1| 0: 1: 1: 1| 0: 1: 1: 0| 0: 0: 0: 0| 1: 0: 0: 0| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+# 
+# if m == 15 then UNPREDICTABLE
+0x8f 0x60 0xb7 0xe7
diff --git a/test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt b/test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt
new file mode 100644
index 0000000..23a0b85
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# LDR (register) has encoding Inst{4} = 0.
+0xba 0xae 0x9f 0x57
diff --git a/test/MC/Disassembler/ARM/invalid-LSL-regform.txt b/test/MC/Disassembler/ARM/invalid-LSL-regform.txt
new file mode 100644
index 0000000..20293ad
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-LSL-regform.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=196 Name=MOVs Format=ARM_FORMAT_DPSOREGFRM(5)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 0: 0: 1| 1: 0: 1: 0| 0: 0: 0: 0| 1: 1: 1: 1| 0: 0: 0: 1| 0: 0: 0: 1| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.89 LSL (register)
+# if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+0x12 0xf1 0xa0 0xe1
diff --git a/test/MC/Disassembler/ARM/invalid-MCR-arm.txt b/test/MC/Disassembler/ARM/invalid-MCR-arm.txt
new file mode 100644
index 0000000..d39b9c1
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-MCR-arm.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=171 Name=MCR Format=ARM_FORMAT_BRFRM(2)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 0: 0: 1: 0| 1: 1: 1: 0| 1: 0: 1: 0| 0: 0: 0: 0| 0: 0: 0: 1| 1: 0: 1: 1| 0: 0: 0: 1| 1: 0: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# Encoding error: coproc == 10 or 11 for MCR[R]/MR[R]C
+0x1b 0x1b 0xa0 0x2e
diff --git a/test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt
new file mode 100644
index 0000000..0b8a077
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=185 Name=MOVTi16 Format=ARM_FORMAT_DPFRM(4)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 0: 1: 1| 0: 1: 0: 0| 0: 0: 0: 1| 1: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+# 
+# if d == 15 then UNPREDICTABLE
+0x00 0xf0 0x41 0xe3
diff --git a/test/MC/Disassembler/ARM/invalid-MOVr-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVr-arm.txt
new file mode 100644
index 0000000..f82d3cb
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-MOVr-arm.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=0 Name=PHI Format=(42)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 0: 0: 1| 1: 0: 1: 1| 1: 1: 0: 0| 1: 1: 0: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+# To qualify as a MOV (register) instruction, Inst{19-16} "should" be 0b0000, instead it is = 0b1100.
+# The instruction is UNPREDICTABLE, and is not a valid intruction.
+#
+# See also
+# A8.6.97 MOV (register)
+0x2 0xd0 0xbc 0xf1
diff --git a/test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt
new file mode 100644
index 0000000..3165ff7
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt
@@ -0,0 +1,9 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=196 Name=MOVs Format=ARM_FORMAT_DPSOREGFRM(5)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 0: 1| 0: 0: 0: 1| 1: 0: 1: 0| 0: 0: 0: 0| 0: 1: 0: 0| 0: 0: 1: 0| 1: 0: 0: 1| 0: 0: 1: 1|
+# -------------------------------------------------------------------------------------------------
+# A8.6.89 LSL (register): Inst{7-4} = 0b0001
+0x93 0x42 0xa0 0xd1
diff --git a/test/MC/Disassembler/ARM/invalid-MOVs-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVs-arm.txt
new file mode 100644
index 0000000..cfbba43
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-MOVs-arm.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=0 Name=PHI Format=(42)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 0: 0: 1| 1: 0: 1: 1| 1: 1: 0: 0| 1: 1: 0: 1| 0: 0: 0: 1| 0: 0: 0: 0| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+# To qualify as an LSL (immediate) instruction, Inst{19-16} "should" be 0b0000, instead it is = 0b1100.
+# The instruction is UNPREDICTABLE, and is not a valid intruction.
+#
+# See also
+# A8.6.88 LSL (immediate)
+# A8.6.98 MOV (shifted register), and
+# I.1 Instruction encoding diagrams and pseudocode
+0x2 0xd1 0xbc 0xf1
+
+
diff --git a/test/MC/Disassembler/ARM/invalid-MSRi-arm.txt b/test/MC/Disassembler/ARM/invalid-MSRi-arm.txt
new file mode 100644
index 0000000..e9d5deb
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-MSRi-arm.txt
@@ -0,0 +1,12 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=206 Name=MSRi Format=ARM_FORMAT_BRFRM(2)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 0: 0: 0: 0| 0: 0: 1: 1| 0: 0: 1: 0| 0: 0: 0: 0| 1: 1: 1: 1| 0: 0: 0: 1| 1: 0: 1: 0| 0: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# A5.2.11 MSR (immediate), and hints & B6.1.6 MSR (immediate)
+# The hints instructions have more specific encodings, so if mask == 0,
+# we should reject this as an invalid instruction.
+0xa7 0xf1 0x20 0x3
diff --git a/test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt b/test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt
new file mode 100644
index 0000000..1fdfa82
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=134 Name=LDMIA Format=ARM_FORMAT_LDSTMULFRM(10)
+# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 1: 0: 0: 0| 1: 0: 0: 1| 1: 0: 0: 1| 1: 0: 1: 1| 0: 0: 0: 1| 0: 0: 1: 1| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# B6.1.8 RFE has Inst{15-0} as 0x0a00 ==> Not an RFE instruction
+# A8.6.53 LDM/LDMIA/LDMFD is predicated with Inst{31-28} as cond ==> Not an LDMIA instruction
+0x32 0xb1 0x99 0xf8
diff --git a/test/MC/Disassembler/ARM/invalid-RSC-arm.txt b/test/MC/Disassembler/ARM/invalid-RSC-arm.txt
new file mode 100644
index 0000000..e7992ae
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-RSC-arm.txt
@@ -0,0 +1,9 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=261 Name=RSCrs Format=ARM_FORMAT_DPSOREGFRM(5)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 0: 0: 1: 1| 0: 0: 0: 0| 1: 1: 1: 0| 0: 1: 0: 0| 1: 1: 1: 1| 1: 0: 0: 0| 0: 1: 0: 1| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+# if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE;
+0x5f 0xf8 0xe4 0x30
diff --git a/test/MC/Disassembler/ARM/invalid-SBFX-arm.txt b/test/MC/Disassembler/ARM/invalid-SBFX-arm.txt
new file mode 100644
index 0000000..1ecd87d
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-SBFX-arm.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=271 Name=SBFX Format=ARM_FORMAT_DPFRM(4)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 1| 1: 0: 1: 0| 0: 1: 1: 1| 0: 1: 0: 1| 0: 1: 0: 0| 0: 1: 0: 1| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# if d == 15 || n == 15 then UNPREDICTABLE;
+0x5f 0x54 0xa7 0xe7
diff --git a/test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt b/test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt
new file mode 100644
index 0000000..c3dcf83
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=284 Name=SMLAD Format=ARM_FORMAT_MULFRM(1)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 0: 0: 1| 0: 1: 1: 1| 0: 0: 0: 0| 1: 1: 1: 1| 0: 1: 1: 0| 1: 0: 0: 0| 0: 0: 0: 1| 1: 0: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.167
+# if d == 15 || n == 15 | m == 15 then UNPREDICTABLE
+0x1b 0x68 0xf 0x97
diff --git a/test/MC/Disassembler/ARM/invalid-SRS-arm.txt b/test/MC/Disassembler/ARM/invalid-SRS-arm.txt
new file mode 100644
index 0000000..fdca9f9
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-SRS-arm.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=0 Name=PHI Format=(42)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 1: 0: 0: 0| 1: 1: 0: 0| 0: 1: 0: 1| 0: 0: 0: 1| 1: 1: 0: 0| 1: 0: 0: 0| 0: 0: 1: 1|
+# -------------------------------------------------------------------------------------------------
+# Unknown format
+#
+# B6.1.10 SRS
+# Inst{19-8} = 0xd05
+# Inst{7-5} = 0b000
+0x83 0x1c 0xc5 0xf8
diff --git a/test/MC/Disassembler/ARM/invalid-SSAT-arm.txt b/test/MC/Disassembler/ARM/invalid-SSAT-arm.txt
new file mode 100644
index 0000000..9cc8351
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-SSAT-arm.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=322 Name=SSAT Format=ARM_FORMAT_SATFRM(13)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 0| 1: 0: 1: 0| 0: 0: 0: 0| 1: 1: 1: 1| 0: 1: 0: 0| 0: 0: 0: 1| 1: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.183 SSAT
+# if d == 15 || n == 15 then UNPREDICTABLE;
+0x1a 0xf4 0xa0 0xe6
diff --git a/test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt
new file mode 100644
index 0000000..0000c60
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=2313 Name=tSTMIA_UPD Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 1: 1: 0: 0| 0: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+# 
+# if BitCount(registers) < 1 then UNPREDICTABLE
+0x00 0xc7
diff --git a/test/MC/Disassembler/ARM/invalid-STRBrs-arm.txt b/test/MC/Disassembler/ARM/invalid-STRBrs-arm.txt
new file mode 100644
index 0000000..5209323
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-STRBrs-arm.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=355 Name=STRBrs Format=ARM_FORMAT_STFRM(7)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 1| 1: 1: 0: 0| 1: 1: 1: 1| 1: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+# 
+# if t == 15 then UNPREDICTABLE
+0x00 0xf0 0xcf 0xe7
diff --git a/test/MC/Disassembler/ARM/invalid-SXTB-arm.txt b/test/MC/Disassembler/ARM/invalid-SXTB-arm.txt
new file mode 100644
index 0000000..4ec681d
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-SXTB-arm.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=390 Name=SXTBr_rot Format=ARM_FORMAT_EXTFRM(14)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 0| 1: 0: 1: 0| 1: 1: 1: 1| 1: 1: 1: 1| 0: 1: 0: 0| 0: 1: 1: 1| 0: 1: 0: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.223 SXTB
+# if d == 15 || m == 15 then UNPREDICTABLE;
+0x75 0xf4 0xaf 0xe6
diff --git a/test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt b/test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt
new file mode 100644
index 0000000..7a3ef33
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=419 Name=UMAAL Format=ARM_FORMAT_MULFRM(1)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 0: 0: 0| 0: 1: 0: 0| 1: 1: 1: 1| 1: 0: 1: 1| 1: 1: 1: 1| 1: 0: 0: 1| 1: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.244 UMAAL
+# if dLo == 15 || dHi == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+0x98 0xbf 0x4f 0xf0 
diff --git a/test/MC/Disassembler/ARM/invalid-UQADD8-arm.txt b/test/MC/Disassembler/ARM/invalid-UQADD8-arm.txt
new file mode 100644
index 0000000..d3f508a
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-UQADD8-arm.txt
@@ -0,0 +1,12 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=426 Name=UQADD8 Format=ARM_FORMAT_DPFRM(4)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 0| 0: 1: 1: 0| 0: 1: 1: 0| 0: 1: 0: 1| 1: 1: 1: 1| 1: 0: 0: 1| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+# 
+# DPFrm with bad reg specifier(s)
+#
+# if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+0x9f 0x5f 0x66 0xe6
diff --git a/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt b/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt
new file mode 100644
index 0000000..56d9ad7
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=737 Name=VLD1DUPq8_UPD Format=ARM_FORMAT_NLdSt(30)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 1: 0: 0| 1: 0: 1: 0| 0: 0: 0: 0| 0: 0: 1: 1| 1: 1: 0: 0| 0: 0: 1: 1| 1: 1: 0: 1|
+# -------------------------------------------------------------------------------------------------
+# 
+# 'a' == 1 and data_size == 8 is invalid
+0x3d 0x3c 0xa0 0xf4
diff --git a/test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt
new file mode 100644
index 0000000..5fd0251
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=871 Name=VLD3DUPd32_UPD Format=ARM_FORMAT_NLdSt(30)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 1: 0: 0| 1: 0: 1: 0| 0: 0: 1: 0| 0: 0: 1: 0| 1: 1: 1: 0| 1: 0: 0: 1| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+# 
+# A8.6.315 VLD3 (single 3-element structure to all lanes)
+# The a bit must be encoded as 0.
+0xa2 0xf9 0x92 0x2e
diff --git a/test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt b/test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt
new file mode 100644
index 0000000..887b983
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# core registers out of range
+0xa5 0xba 0x72 0xed
diff --git a/test/MC/Disassembler/ARM/invalid-VQADD-arm.txt b/test/MC/Disassembler/ARM/invalid-VQADD-arm.txt
new file mode 100644
index 0000000..eed012b
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-VQADD-arm.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=1225 Name=VQADDsv16i8 Format=ARM_FORMAT_N3Reg(37)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 0: 1: 0| 0: 1: 0: 0| 0: 0: 0: 0| 1: 1: 1: 0| 0: 0: 0: 0| 1: 1: 0: 1| 1: 0: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# Qm -> bit[0] == 0, otherwise UNDEFINED
+0xdb 0xe0 0x40 0xf2
diff --git a/test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt b/test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt
new file mode 100644
index 0000000..506250c
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=1641 Name=VST2b32_UPD Format=ARM_FORMAT_NLdSt(30)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 1: 0: 0| 0: 0: 0: 0| 0: 0: 1: 1| 0: 0: 0: 0| 1: 0: 0: 1| 1: 0: 1: 1| 0: 0: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.393 VST2 (multiple 2-element structures)
+# type == '1001' and align == '11' ==> UNDEFINED
+0xb3 0x9 0x3 0xf4
diff --git a/test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt
new file mode 100644
index 0000000..d0bc51e
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=1894 Name=t2Bcc Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 1: 1: 1| 1: 0: 1: 0| 1: 1: 1: 1| 1: 0: 0: 0| 1: 0: 1: 1| 0: 1: 0: 0| 0: 1: 0: 0|
+# -------------------------------------------------------------------------------------------------
+# 
+# A8.6.16 B
+# if cond<3:1> == '111' then SEE "Related Encodings"
+0xaf 0xf7 0x44 0x8b
diff --git a/test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt
new file mode 100644
index 0000000..9befbd6
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=1922 Name=t2LDRBT Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 1: 0: 0: 0| 0: 0: 0: 1| 0: 0: 0: 0| 1: 1: 1: 1| 1: 1: 1: 0| 0: 0: 0: 0| 0: 0: 1: 1|
+# -------------------------------------------------------------------------------------------------
+# 
+# The unpriviledged Load/Store cannot have SP or PC as Rt.
+0x10 0xf8 0x3 0xfe
diff --git a/test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt
new file mode 100644
index 0000000..598efd1
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=1934 Name=t2LDREXD Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 1: 0: 0: 0| 1: 1: 0: 1| 0: 0: 1: 0| 1: 0: 0: 0| 1: 0: 0: 0| 0: 1: 1: 1| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+# 
+# if t == t2 then UNPREDICTABLE
+0xd2 0xe8 0x7f 0x88
diff --git a/test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt
new file mode 100644
index 0000000..a501eb9
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=1953 Name=t2LDRSHi12 Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 1: 0: 0: 1| 1: 0: 1: 1| 0: 0: 1: 1| 1: 1: 1: 1| 1: 0: 0: 0| 1: 1: 0: 1| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+# 
+# if Rt = '1111' then SEE "Unallocated memory hints"
+0xb3 0xf9 0xdf 0xf8
diff --git a/test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt
new file mode 100644
index 0000000..f886a6f
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=1954 Name=t2LDRSHi8 Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 1: 0: 0: 1| 0: 0: 1: 1| 0: 1: 0: 1| 1: 1: 1: 1| 1: 1: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+# 
+# if Rt == '1111' and PUW == '100' then SEE "Unallocated memory hints"
+0x35 0xf9 0x00 0xfc
diff --git a/test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt
new file mode 100644
index 0000000..c8f8ec2
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=2124 Name=t2STRD_PRE Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 1: 0: 0: 1| 1: 1: 1: 0| 0: 1: 0: 0| 0: 1: 0: 0| 0: 1: 1: 0| 0: 0: 0: 0| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+# 
+# if wback && (n == t || n == t2) then UNPREDICTABLE
+0xe4 0xe9 0x02 0x46
diff --git a/test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt
new file mode 100644
index 0000000..35ea651
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=2127 Name=t2STREXB Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 1: 0: 0: 0| 1: 1: 0: 0| 0: 0: 1: 0| 1: 0: 0: 0| 1: 1: 1: 1| 0: 1: 0: 0| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+# 
+# if d == n || d == t then UNPREDICTABLE
+0xc2 0xe8 0x42 0x8f
diff --git a/test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt
new file mode 100644
index 0000000..9b0cf24
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=2128 Name=t2STREXD Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 1: 0: 0: 0| 1: 1: 0: 0| 0: 0: 1: 0| 0: 1: 1: 1| 1: 0: 0: 0| 0: 1: 1: 1| 1: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+# 
+# if d == n || d == t || d == t2 then UNPREDICTABLE
+mc-input.txt:1:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt
new file mode 100644
index 0000000..129a270
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=2137 Name=t2STR_POST Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 1: 0: 0: 0| 0: 1: 0: 0| 1: 1: 1: 1| 1: 1: 1: 0| 1: 0: 1: 1| 1: 1: 1: 1| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+# 
+# if Rn == '1111' then UNDEFINED
+0x4f 0xf8 0xff 0xeb
diff --git a/test/MC/Disassembler/ARM/neon-tests.txt b/test/MC/Disassembler/ARM/neon-tests.txt
index eb9adb7..cfb5949 100644
--- a/test/MC/Disassembler/ARM/neon-tests.txt
+++ b/test/MC/Disassembler/ARM/neon-tests.txt
@@ -21,6 +21,12 @@
 # CHECK:	vld4.8	{d4, d6, d8, d10}, [r2]
 0x0f 0x41 0x22 0xf4
 
+# CHECK:	vld1.32	{d3[], d4[]}, [r0, :32]!
+0xbd 0x3c 0xa0 0xf4
+
+# CHECK:	vld4.16	{d3[], d4[], d5[], d6[]}, [r0, :64]!
+0x7d 0x3f 0xa0 0xf4
+
 # CHECK:	vmov	d0, d15
 0x1f 0x01 0x2f 0xf2
 
@@ -59,3 +65,27 @@
 
 # CHECK: vmov.f64 d0, #5.000000e-01
 0x00 0x0b 0xb6 0xee
+
+# CHECK:	vpop	{d8}
+0x02 0x8b 0xbd 0xec
+
+# CHECK:	vorr.i32	q15, #0x4F0000
+0x5f 0xe5 0xc4 0xf2
+
+# CHECK:	vbic.i32	q2, #0xA900
+0x79 0x43 0x82 0xf3
+
+# CHECK:	vst2.32	{d16, d18}, [r2, :64], r2
+0x92 0x9 0x42 0xf4
+
+# CHECK:	vmov.s8	r0, d8[1]
+0x30 0x0b 0x58 0xee
+
+# CHECK:	vmov	r1, r0, d11
+0x1b 0x1b 0x50 0xec
+
+# CHECK:	usada8mi	r8, r9, r5, r9
+0x19 0x95 0x88 0x47
+
+# CHECK:	vext.8	q4, q2, q1, #4
+0x42 0x84 0xb4 0xf2
diff --git a/test/MC/Disassembler/ARM/thumb-printf.txt b/test/MC/Disassembler/ARM/thumb-printf.txt
new file mode 100644
index 0000000..6c2c500
--- /dev/null
+++ b/test/MC/Disassembler/ARM/thumb-printf.txt
@@ -0,0 +1,77 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 | FileCheck %s
+
+# CHECK:	push	{r0, r1, r2, r3}
+# CHECK-NEXT:	push	{r4, r5, r7, lr}
+# CHECK-NEXT:	add	r7, sp, #8
+# CHECK-NEXT:	sub	sp, #4
+# CHECK-NEXT:	add	r3, sp, #20
+# CHECK-NEXT:	ldr	r5, [r3], #4
+# CHECK-NEXT:	str	r3, [sp]
+# CHECK-NEXT:	ldr	r3, #52
+# CHECK-NEXT:	add	r3, pc
+# CHECK-NEXT:	ldr	r0, [r3]
+# CHECK-NEXT:	ldr	r4, [r0]
+# CHECK-NEXT:	ldr	r0, #48
+# CHECK-NEXT:	add	r0, pc
+# CHECK-NEXT:	ldr	r0, [r0]
+# CHECK-NEXT:	ldr	r0, [r0]
+# CHECK-NEXT:	blx	#191548
+# CHECK-NEXT:	cbnz	r0, #6
+# CHECK-NEXT:	ldr	r1, #40
+# CHECK-NEXT:	add	r1, pc
+# CHECK-NEXT:	ldr	r1, [r1]
+# CHECK-NEXT:	b	#0
+# CHECK-NEXT:	mov	r1, r0
+# CHECK-NEXT:	mov	r0, r4
+# CHECK-NEXT:	mov	r2, r5
+# CHECK-NEXT:	ldr	r3, [sp]
+# CHECK-NEXT:	bl	#-8390
+# Data bytes (corresponds to an invalid instruction)
+# But not:	sub.w	sp, r7, #8
+# CHECK-NEXT:	pop.w	{r4, r5, r7, lr}
+# CHECK-NEXT:	add	sp, #16
+# CHECK-NEXT:	bx	lr
+# CHECK-NEXT:	nop
+# CHECK-NEXT:	movs	r3, #142
+# CHECK-NEXT:	movs	r5, r0
+# CHECK-NEXT:	adds	r1, #122
+# CHECK-NEXT:	movs	r5, r0
+# CHECK-NEXT:	adds	r1, #104
+# CHECK-NEXT:	movs	r5, r0
+0x0f	0xb4
+0xb0	0xb5
+0x02	0xaf
+0x81	0xb0
+0x05	0xab
+0x53	0xf8	0x04	0x5b
+0x00	0x93
+0x0d	0x4b
+0x7b	0x44
+0x18	0x68
+0x04	0x68
+0x0c	0x48
+0x78	0x44
+0x00	0x68
+0x00	0x68
+0x2e	0xf0	0x1e	0xee
+0x18	0xb9
+0x0a	0x49
+0x79	0x44
+0x09	0x68
+0x00	0xe0
+0x01	0x46
+0x20	0x46
+0x2a	0x46
+0x00	0x9b
+0xfd	0xf7	0x9d	0xff
+# 0xa7	0xf1	0x08	0x0d
+0xbd	0xe8	0xb0	0x40
+0x04	0xb0
+0x70	0x47
+0x00	0xbf
+0x8e	0x23
+0x05	0x00
+0x7a	0x31
+0x05	0x00
+0x68	0x31
+0x05	0x00
diff --git a/test/MC/Disassembler/ARM/thumb-tests.txt b/test/MC/Disassembler/ARM/thumb-tests.txt
index 6dab123..774dbe4 100644
--- a/test/MC/Disassembler/ARM/thumb-tests.txt
+++ b/test/MC/Disassembler/ARM/thumb-tests.txt
@@ -6,11 +6,14 @@
 # CHECK:	adcs	r0, r0, #1
 0x50 0xf1 0x01 0x00
 
-# CHECK:	b	#34
+# CHECK:	b	#30
 0x0f 0xe0
 
-# CHECK:	b.w	#-12
-0xff 0xf7 0xf8 0xaf
+# CHECK:	bgt.w	#-16
+0x3f 0xf7 0xf8 0xaf
+
+# CHECK:	bfc	r0, #10, #10
+0x6f 0xf3 0x93 0x20
 
 # CHECK:	bfi	r2, r10, #0, #1
 0x6a 0xf3 0x00 0x02
@@ -27,14 +30,26 @@
 # CHECK:	ldmia	r0!, {r1}
 0x02 0xc8
 
+# CHECK:	ldr	r5, #432
+0x6c 0x4d
+
+# CHECK:	str	r0, [r3]
+0x18 0x60
+
+# CHECK:	str	r0, [r3, #4]
+0x58 0x60
+
+# CHECK:	str	r2, [r5, r3]
+0xea 0x50
+
 # CHECK:	ldrb.w	r8, #-24
 0x1f 0xf8 0x18 0x80
 
 # CHECK:	ldrd	r0, r1, [r7, #64]!
 0xf7 0xe9 0x10 0x01
 
-# CHECK:	lsls.w	r0, pc, #1
-0x5f 0xea 0x4f 0x00
+# CHECK:	lsls.w	r0, r5, #1
+0x5f 0xea 0x45 0x00
 
 # CHECK:	mov	r11, r7
 0xbb 0x46
@@ -118,3 +133,132 @@
 
 # CHECK: msr cpsr_fc, r0
 0x80 0xf3 0x00 0x89
+
+# CHECK: blx	#-4
+0xff 0xf7 0xfe 0xef
+
+# CHECK:	vpush	{d8, d9, d10}
+0x2d 0xed 0x06 0x8b
+
+# CHECK:	vcmpe.f64	d8, #0
+0xb5 0xee 0xc0 0x8b
+
+# CHECK:	stmdb.w	sp, {r0, r2, r3, r8, r11, lr}
+0x0d 0xe9 0x0d 0x49
+
+# CHECK:	stmia	r5!, {r0, r1, r2, r3, r4}
+0x1f 0xc5
+
+# CHECK:	ldmia	r5, {r0, r1, r2, r3, r4, r5}
+0x3f 0xcd
+
+# CHECK:	ldmia	r5!, {r0, r1, r2, r3, r4}
+0x1f 0xcd
+
+# CHECK:	addw	r0, pc, #1050
+0x0f 0xf2 0x1a 0x40
+
+# CHECK:	ldrd	r3, r8, [r11, #-60]
+0x5b 0xe9 0x0f 0x38
+
+# CHECK:	ldrex	r8, [r2]
+0x52 0xe8 0x00 0x8f
+
+# CHECK:	ldrexd	r8, r9, [r2]
+0xd2 0xe8 0x7f 0x89
+
+# CHECK:	strexd	r1, r7, r8, [r2]
+0xc2 0xe8 0x71 0x78
+
+# CHECK:	tbh	[r5, r4, lsl #1]
+0xd5 0xe8 0x14 0xf0
+
+# CHECK:	tbb	[r5, r4]
+0xd5 0xe8 0x04 0xf0
+
+# CHECK:	ldr.w	r4, [sp, r4, lsl #3]
+0x5d 0xf8 0x34 0x40
+
+# CHECK:	ldr.w	r5, [r6, #30]
+0xd6 0xf8 0x1e 0x50
+
+# CHECK:	ldrh.w	r5, [r6, #30]
+0xb6 0xf8 0x1e 0x50
+
+# CHECK:	ldrt	r5, [r6, #30]
+0x56 0xf8 0x1e 0x5e
+
+# CHECK:	ldr	r5, [r6, #-30]
+0x56 0xf8 0x1e 0x5c
+
+# CHECK:	sel	r7, r3, r5
+0xa3 0xfa 0x85 0xf7
+
+# CHECK:	lsl.w	r7, r3, r5
+0x03 0xfa 0x05 0xf7
+
+# CHECK:	adds.w	r7, r3, r5
+0x13 0xeb 0x05 0x07
+
+# CHECK:	smlabt	r4, r3, r2, r1
+0x13 0xfb 0x12 0x14
+
+# CHECK:	smmulr	r7, r8, r9
+0x58 0xfb 0x19 0xf7
+
+# CHECK:	umull	r1, r2, r3, r4
+0xa3 0xfb 0x04 0x12
+
+# CHECK:	pld	[r5, r0, lsl #1]
+0x15 0xf8 0x10 0xf0
+
+# CHECK:	pld	[pc, #-16]
+0x1f 0xf8 0x10 0xf0
+
+# CHECK:	pld	[r5, #30]
+0x95 0xf8 0x1e 0xf0
+
+# CHECK:	stc2	p12, cr15, [r9], {137}
+0x89 0xfc 0x89 0xfc
+
+# CHECK:	vmov	r1, r0, d11
+0x50 0xec 0x1b 0x1b
+
+# CHECK:	dsb	nsh
+0xbf 0xf3 0x47 0x8f
+
+# CHECK:	isb
+0xbf 0xf3 0x6f 0x8f
+
+# CHECK:	asrs	r1, r0, #32
+0x1 0x10
+
+# CHECK:	lsr.w	r10, r0, #32
+0x4f 0xea 0x10 0x0a
+
+# CHECK:	blx	sp
+0xe8 0x47
+
+# CHECK:	bx	lr
+0x70 0x47
+
+# CHECK:	bx	pc
+0x78 0x47
+
+# CHECK:	svc	#230
+0xe6 0xdf
+
+# CHECK:	rfedb	lr
+0x1e 0xe8 0x00 0xc0
+
+# CHECK:	mov.w	r3, #4294967295
+0x4f 0xf0 0xff 0x33
+
+# CHECK:	mov	pc, sp
+0xef 0x46
+
+# CHECK:	nop
+0x00 0xbf
+
+# CHECK:	nop.w
+0xaf 0xf3 0x00 0x80
diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt
index 13a19d2..08fb4c5 100644
--- a/test/MC/Disassembler/X86/simple-tests.txt
+++ b/test/MC/Disassembler/X86/simple-tests.txt
@@ -66,3 +66,9 @@
 
 # CHECK: movw	$47416, -66(%rbp)
 0x66 0xc7 0x45 0xbe 0x38 0xb9
+
+# CHECK: vaddpd	%ymm13, %ymm1, %ymm0
+0xc4 0xc1 0x75 0x58 0xc5
+
+# CHECK: vaddps	%ymm3, %ymm1, %ymm0
+0xc5 0xf4 0x58 0xc3
diff --git a/test/MC/ELF/alias-reloc.s b/test/MC/ELF/alias-reloc.s
index c908c12..67266d6 100644
--- a/test/MC/ELF/alias-reloc.s
+++ b/test/MC/ELF/alias-reloc.s
@@ -17,6 +17,20 @@ foo2:
     .set    bar2,foo2
     .quad    bar2
 
+// CHECK:       # Relocation 0x00000000
+// CHECK-NEXT:  (('r_offset', 0x00000001)
+// CHECK-NEXT:   ('r_sym', 0x00000001)
+// CHECK-NEXT:   ('r_type', 0x00000004)
+// CHECK-NEXT:   ('r_addend', 0xfffffffc)
+// CHECK-NEXT:  ),
+
+// CHECK:      # Relocation 0x00000001
+// CHECK-NEXT: (('r_offset', 0x00000005)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x00000001)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+
 // CHECK:       # Symbol 0x00000001
 // CHECK-NEXT:  (('st_name', 0x00000005) # 'bar'
 // CHECK-NEXT:   ('st_bind', 0x00000000)
@@ -36,17 +50,3 @@ foo2:
 // CHECK-NEXT:  ('st_value', 0x0000000000000005)
 // CHECK-NEXT:  ('st_size', 0x0000000000000000)
 // CHECK-NEXT: ),
-
-// CHECK:       # Relocation 0x00000000
-// CHECK-NEXT:  (('r_offset', 0x00000001)
-// CHECK-NEXT:   ('r_sym', 0x00000001)
-// CHECK-NEXT:   ('r_type', 0x00000004)
-// CHECK-NEXT:   ('r_addend', 0xfffffffc)
-// CHECK-NEXT:  ),
-
-// CHECK:      # Relocation 0x00000001
-// CHECK-NEXT: (('r_offset', 0x00000005)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x00000001)
-// CHECK-NEXT:  ('r_addend', 0x00000000)
-// CHECK-NEXT: ),
diff --git a/test/MC/ELF/align-bss.s b/test/MC/ELF/align-bss.s
index 4f73a29..ca6da91 100644
--- a/test/MC/ELF/align-bss.s
+++ b/test/MC/ELF/align-bss.s
@@ -5,7 +5,7 @@
 	.local	foo
 	.comm	foo,2048,16
 
-// CHECK:        ('sh_name', 0x0000000d) # '.bss'
+// CHECK:        ('sh_name', 0x00000007) # '.bss'
 // CHECK-NEXT:   ('sh_type', 0x00000008)
 // CHECK-NEXT:   ('sh_flags', 0x00000003)
 // CHECK-NEXT:   ('sh_addr', 0x00000000)
diff --git a/test/MC/ELF/align-nops.s b/test/MC/ELF/align-nops.s
index 28d4b89..d29cb5b 100644
--- a/test/MC/ELF/align-nops.s
+++ b/test/MC/ELF/align-nops.s
@@ -27,7 +27,7 @@ f0:
 // CHECK-NEXT:  ('sh_entsize', 0x00000000)
 // CHECK-NEXT:  ('_section_data', '00000000 0f1f4000 00000000 0f1f4000')
 
-// CHECK: (('sh_name', 0x00000007) # '.data'
+// CHECK: (('sh_name', 0x00000026) # '.data'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000003)
 // CHECK-NEXT:  ('sh_addr',
diff --git a/test/MC/ELF/align.s b/test/MC/ELF/align.s
index c3912a7..c1e7086 100644
--- a/test/MC/ELF/align.s
+++ b/test/MC/ELF/align.s
@@ -8,7 +8,7 @@
 	.align	8
 
 // CHECK: # Section 0x00000003
-// CHECK-NEXT:  (('sh_name', 0x0000000d) # '.bss'
+// CHECK-NEXT:  (('sh_name', 0x00000007) # '.bss'
 // CHECK-NEXT:   ('sh_type', 0x00000008)
 // CHECK-NEXT:   ('sh_flags', 0x00000003)
 // CHECK-NEXT:   ('sh_addr', 0x00000000)
@@ -20,7 +20,7 @@
 // CHECK-NEXT:   ('sh_entsize', 0x00000000)
 // CHECK-NEXT:  ),
 // CHECK-NEXT:  # Section 0x00000004
-// CHECK-NEXT:  (('sh_name', 0x00000012) # '.rodata'
+// CHECK-NEXT:  (('sh_name', 0x00000026) # '.rodata'
 // CHECK-NEXT:   ('sh_type', 0x00000001)
 // CHECK-NEXT:   ('sh_flags', 0x00000002)
 // CHECK-NEXT:   ('sh_addr', 0x00000000)
diff --git a/test/MC/ELF/basic-elf-32.s b/test/MC/ELF/basic-elf-32.s
index fa97da4..7c2927a 100644
--- a/test/MC/ELF/basic-elf-32.s
+++ b/test/MC/ELF/basic-elf-32.s
@@ -39,23 +39,6 @@ main:                                   # @main
 
 // CHECK:   # '.text'
 
-// CHECK: ('st_bind', 0x00000000)
-// CHECK: ('st_type', 0x00000003)
-
-// CHECK: ('st_bind', 0x00000000)
-// CHECK: ('st_type', 0x00000003)
-
-// CHECK: ('st_bind', 0x00000000)
-// CHECK: ('st_type', 0x00000003)
-
-// CHECK:   # 'main'
-// CHECK:   ('st_bind', 0x00000001)
-// CHECK-NEXT: ('st_type', 0x00000002)
-
-// CHECK:   # 'puts'
-// CHECK:   ('st_bind', 0x00000001)
-// CHECK-NEXT: ('st_type', 0x00000000)
-
 // CHECK:   # '.rel.text'
 
 // CHECK:   ('_relocations', [
@@ -76,3 +59,20 @@ main:                                   # @main
 // CHECK:      ('r_type', 0x00000002)
 // CHECK:     ),
 // CHECK:   ])
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK:   # 'main'
+// CHECK:   ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000002)
+
+// CHECK:   # 'puts'
+// CHECK:   ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000000)
diff --git a/test/MC/ELF/basic-elf-64.s b/test/MC/ELF/basic-elf-64.s
index 7fc40b7..5ae1f45 100644
--- a/test/MC/ELF/basic-elf-64.s
+++ b/test/MC/ELF/basic-elf-64.s
@@ -39,23 +39,6 @@ main:                                   # @main
 
 // CHECK:   # '.text'
 
-// CHECK: ('st_bind', 0x00000000)
-// CHECK: ('st_type', 0x00000003)
-
-// CHECK: ('st_bind', 0x00000000)
-// CHECK: ('st_type', 0x00000003)
-
-// CHECK: ('st_bind', 0x00000000)
-// CHECK: ('st_type', 0x00000003)
-
-// CHECK:   # 'main'
-// CHECK-NEXT: ('st_bind', 0x00000001)
-// CHECK-NEXT: ('st_type', 0x00000002)
-
-// CHECK:   # 'puts'
-// CHECK-NEXT: ('st_bind', 0x00000001)
-// CHECK-NEXT: ('st_type', 0x00000000)
-
 // CHECK:   # '.rela.text'
 
 // CHECK:   ('_relocations', [
@@ -80,3 +63,20 @@ main:                                   # @main
 // CHECK:      ('r_addend', 0xfffffffc)
 // CHECK:     ),
 // CHECK:   ])
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK:   # 'main'
+// CHECK-NEXT: ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000002)
+
+// CHECK:   # 'puts'
+// CHECK-NEXT: ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000000)
diff --git a/test/MC/ELF/cfi-adjust-cfa-offset.s b/test/MC/ELF/cfi-adjust-cfa-offset.s
new file mode 100644
index 0000000..5c1a9f9
--- /dev/null
+++ b/test/MC/ELF/cfi-adjust-cfa-offset.s
@@ -0,0 +1,46 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+	subq	$8, %rsp
+	.cfi_def_cfa_offset 16
+        nop
+        .cfi_adjust_cfa_offset 4
+	addq	$8, %rsp
+	.cfi_def_cfa_offset 8
+	ret
+	.cfi_endproc
+
+// CHECK:       # Section 0x00000004
+// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000002)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000050)
+// CHECK-NEXT:   ('sh_size', 0x00000038)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000008)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
+// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 1c000000 1c000000 00000000 0a000000 00440e10 410e1444 0e080000 00000000')
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 0x00000005
+// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000004)
+// CHECK-NEXT:   ('sh_flags', 0x00000000)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x000003a0)
+// CHECK-NEXT:   ('sh_size', 0x00000018)
+// CHECK-NEXT:   ('sh_link', 0x00000007)
+// CHECK-NEXT:   ('sh_info', 0x00000004)
+// CHECK-NEXT:   ('sh_addralign', 0x00000008)
+// CHECK-NEXT:   ('sh_entsize', 0x00000018)
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:    # Relocation 0x00000000
+// CHECK-NEXT:    (('r_offset', 0x00000020)
+// CHECK-NEXT:     ('r_sym', 0x00000002)
+// CHECK-NEXT:     ('r_type', 0x00000002)
+// CHECK-NEXT:     ('r_addend', 0x00000000)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/cfi-advance-loc2.s b/test/MC/ELF/cfi-advance-loc2.s
index 3ffdd6c..163e810 100644
--- a/test/MC/ELF/cfi-advance-loc2.s
+++ b/test/MC/ELF/cfi-advance-loc2.s
@@ -10,7 +10,7 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000002)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
@@ -24,13 +24,13 @@ f:
 // CHECK-NEXT: ),
 
 
-// CHECK:      (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK:      (('sh_name', 0x0000000c) # '.rela.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000004)
 // CHECK-NEXT:  ('sh_flags', 0x00000000)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
-// CHECK-NEXT:  ('sh_offset', 0x00000258)
+// CHECK-NEXT:  ('sh_offset', 0x00000490)
 // CHECK-NEXT:  ('sh_size', 0x00000018)
-// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_link', 0x00000007)
 // CHECK-NEXT:  ('sh_info', 0x00000004)
 // CHECK-NEXT:  ('sh_addralign', 0x00000008)
 // CHECK-NEXT:  ('sh_entsize', 0x00000018)
diff --git a/test/MC/ELF/cfi-def-cfa-offset.s b/test/MC/ELF/cfi-def-cfa-offset.s
index efefb87..124d02e 100644
--- a/test/MC/ELF/cfi-def-cfa-offset.s
+++ b/test/MC/ELF/cfi-def-cfa-offset.s
@@ -11,7 +11,7 @@ f:
 	.cfi_endproc
 
 // CHECK:      # Section 0x00000004
-// CHECK-NEXT: (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT: (('sh_name', 0x00000011) # '.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000002)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
@@ -24,14 +24,14 @@ f:
 // CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 0a000000 00440e10 450e0800')
 // CHECK-NEXT: ),
 
-// CHECK:       # Section 0x00000008
-// CHECK-NEXT: (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK:       # Section 0x00000005
+// CHECK-NEXT: (('sh_name', 0x0000000c) # '.rela.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000004)
 // CHECK-NEXT:  ('sh_flags', 0x00000000)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
-// CHECK-NEXT:  ('sh_offset', 0x00000160)
+// CHECK-NEXT:  ('sh_offset', 0x00000398)
 // CHECK-NEXT:  ('sh_size', 0x00000018)
-// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_link', 0x00000007)
 // CHECK-NEXT:  ('sh_info', 0x00000004)
 // CHECK-NEXT:  ('sh_addralign', 0x00000008)
 // CHECK-NEXT:  ('sh_entsize', 0x00000018)
diff --git a/test/MC/ELF/cfi-def-cfa-register.s b/test/MC/ELF/cfi-def-cfa-register.s
index 3df2021..75311e2 100644
--- a/test/MC/ELF/cfi-def-cfa-register.s
+++ b/test/MC/ELF/cfi-def-cfa-register.s
@@ -7,7 +7,7 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000002)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
@@ -20,13 +20,13 @@ f:
 // CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410d06 00000000')
 // CHECK-NEXT: ),
 
-// CHECK:      (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK:      (('sh_name', 0x0000000c) # '.rela.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000004)
 // CHECK-NEXT:  ('sh_flags', 0x00000000)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
-// CHECK-NEXT:  ('sh_offset', 0x00000158)
+// CHECK-NEXT:  ('sh_offset', 0x00000390)
 // CHECK-NEXT:  ('sh_size', 0x00000018)
-// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_link', 0x00000007)
 // CHECK-NEXT:  ('sh_info', 0x00000004)
 // CHECK-NEXT:  ('sh_addralign', 0x00000008)
 // CHECK-NEXT:  ('sh_entsize', 0x00000018)
diff --git a/test/MC/ELF/cfi-def-cfa.s b/test/MC/ELF/cfi-def-cfa.s
index 1ad427b..53a169c 100644
--- a/test/MC/ELF/cfi-def-cfa.s
+++ b/test/MC/ELF/cfi-def-cfa.s
@@ -7,7 +7,7 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000002)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
@@ -21,13 +21,13 @@ f:
 // CHECK-NEXT: ),
 
 
-// CHECK:      (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK:      (('sh_name', 0x0000000c) # '.rela.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000004)
 // CHECK-NEXT:  ('sh_flags', 0x00000000)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
-// CHECK-NEXT:  ('sh_offset', 0x00000158)
+// CHECK-NEXT:  ('sh_offset', 0x00000390)
 // CHECK-NEXT:  ('sh_size', 0x00000018)
-// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_link', 0x00000007)
 // CHECK-NEXT:  ('sh_info', 0x00000004)
 // CHECK-NEXT:  ('sh_addralign', 0x00000008)
 // CHECK-NEXT:  ('sh_entsize', 0x00000018)
diff --git a/test/MC/ELF/cfi-offset.s b/test/MC/ELF/cfi-offset.s
index 2f7e797..963a76c 100644
--- a/test/MC/ELF/cfi-offset.s
+++ b/test/MC/ELF/cfi-offset.s
@@ -7,7 +7,7 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000002)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
@@ -21,13 +21,13 @@ f:
 // CHECK-NEXT: ),
 
 
-// CHECK:      (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK:      (('sh_name', 0x0000000c) # '.rela.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000004)
 // CHECK-NEXT:  ('sh_flags', 0x00000000)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
-// CHECK-NEXT:  ('sh_offset', 0x00000158)
+// CHECK-NEXT:  ('sh_offset', 0x00000390)
 // CHECK-NEXT:  ('sh_size', 0x00000018)
-// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_link', 0x00000007)
 // CHECK-NEXT:  ('sh_info', 0x00000004)
 // CHECK-NEXT:  ('sh_addralign', 0x00000008)
 // CHECK-NEXT:  ('sh_entsize', 0x00000018)
diff --git a/test/MC/ELF/cfi-rel-offset.s b/test/MC/ELF/cfi-rel-offset.s
new file mode 100644
index 0000000..87c0cf3
--- /dev/null
+++ b/test/MC/ELF/cfi-rel-offset.s
@@ -0,0 +1,49 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+        .cfi_def_cfa_offset 8
+        nop
+        .cfi_def_cfa_register 6
+        nop
+        .cfi_rel_offset 6,16
+        nop
+        .cfi_def_cfa_offset 16
+        nop
+        .cfi_rel_offset 6,0
+	.cfi_endproc
+
+// CHECK:       # Section 0x00000004
+// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000002)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000048)
+// CHECK-NEXT:   ('sh_size', 0x00000040)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000008)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
+// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 24000000 1c000000 00000000 05000000 00410e08 410d0641 11067f41 0e104186 02000000 00000000')
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 0x00000005
+// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000004)
+// CHECK-NEXT:   ('sh_flags', 0x00000000)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x000003a0)
+// CHECK-NEXT:   ('sh_size', 0x00000018)
+// CHECK-NEXT:   ('sh_link', 0x00000007)
+// CHECK-NEXT:   ('sh_info', 0x00000004)
+// CHECK-NEXT:   ('sh_addralign', 0x00000008)
+// CHECK-NEXT:   ('sh_entsize', 0x00000018)
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:    # Relocation 0x00000000
+// CHECK-NEXT:    (('r_offset', 0x00000020)
+// CHECK-NEXT:     ('r_sym', 0x00000002)
+// CHECK-NEXT:     ('r_type', 0x00000002)
+// CHECK-NEXT:     ('r_addend', 0x00000000)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/cfi-rel-offset2.s b/test/MC/ELF/cfi-rel-offset2.s
new file mode 100644
index 0000000..f14beaf
--- /dev/null
+++ b/test/MC/ELF/cfi-rel-offset2.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+        .cfi_rel_offset 6,16
+	.cfi_endproc
+
+// CHECK:       # Section 0x00000004
+// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000002)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000048)
+// CHECK-NEXT:   ('sh_size', 0x00000030)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000008)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
+// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 01000000 00411106 7f000000')
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 0x00000005
+// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000004)
+// CHECK-NEXT:   ('sh_flags', 0x00000000)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000390)
+// CHECK-NEXT:   ('sh_size', 0x00000018)
+// CHECK-NEXT:   ('sh_link', 0x00000007)
+// CHECK-NEXT:   ('sh_info', 0x00000004)
+// CHECK-NEXT:   ('sh_addralign', 0x00000008)
+// CHECK-NEXT:   ('sh_entsize', 0x00000018)
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:    # Relocation 0x00000000
+// CHECK-NEXT:    (('r_offset', 0x00000020)
+// CHECK-NEXT:     ('r_sym', 0x00000002)
+// CHECK-NEXT:     ('r_type', 0x00000002)
+// CHECK-NEXT:     ('r_addend', 0x00000000)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/cfi-remember.s b/test/MC/ELF/cfi-remember.s
index b5b3803..814812e 100644
--- a/test/MC/ELF/cfi-remember.s
+++ b/test/MC/ELF/cfi-remember.s
@@ -10,7 +10,7 @@ f:
 	.cfi_endproc
 
 // CHECK:      # Section 0x00000004
-// CHECK-NEXT: (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT: (('sh_name', 0x00000011) # '.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000002)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
@@ -23,14 +23,14 @@ f:
 // CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 03000000 00410a41 0b000000')
 // CHECK-NEXT: ),
 
-// CHECK:      # Section 0x00000008
-// CHECK-NEXT: (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK:      # Section 0x00000005
+// CHECK-NEXT: (('sh_name', 0x0000000c) # '.rela.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000004)
 // CHECK-NEXT:  ('sh_flags', 0x00000000)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
-// CHECK-NEXT:  ('sh_offset', 0x00000158)
+// CHECK-NEXT:  ('sh_offset', 0x00000390)
 // CHECK-NEXT:  ('sh_size', 0x00000018)
-// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_link', 0x00000007)
 // CHECK-NEXT:  ('sh_info', 0x00000004)
 // CHECK-NEXT:  ('sh_addralign', 0x00000008)
 // CHECK-NEXT:  ('sh_entsize', 0x00000018)
diff --git a/test/MC/ELF/cfi-same-value.s b/test/MC/ELF/cfi-same-value.s
new file mode 100644
index 0000000..eab1ae4
--- /dev/null
+++ b/test/MC/ELF/cfi-same-value.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+        .cfi_same_value 6
+        nop
+	.cfi_endproc
+
+// CHECK:       # Section 0x00000004
+// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000002)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000048)
+// CHECK-NEXT:   ('sh_size', 0x00000030)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000008)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
+// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410806 00000000')
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 0x00000005
+// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000004)
+// CHECK-NEXT:   ('sh_flags', 0x00000000)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000390)
+// CHECK-NEXT:   ('sh_size', 0x00000018)
+// CHECK-NEXT:   ('sh_link', 0x00000007)
+// CHECK-NEXT:   ('sh_info', 0x00000004)
+// CHECK-NEXT:   ('sh_addralign', 0x00000008)
+// CHECK-NEXT:   ('sh_entsize', 0x00000018)
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:    # Relocation 0x00000000
+// CHECK-NEXT:    (('r_offset', 0x00000020)
+// CHECK-NEXT:     ('r_sym', 0x00000002)
+// CHECK-NEXT:     ('r_type', 0x00000002)
+// CHECK-NEXT:     ('r_addend', 0x00000000)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/cfi-zero-addr-delta.s b/test/MC/ELF/cfi-zero-addr-delta.s
index 5585e29..3ddf69e 100644
--- a/test/MC/ELF/cfi-zero-addr-delta.s
+++ b/test/MC/ELF/cfi-zero-addr-delta.s
@@ -14,7 +14,7 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000002)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
@@ -27,13 +27,13 @@ f:
 // CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 1c000000 1c000000 00000000 04000000 00410e10 410a0e08 410b0000 00000000')
 // CHECK-NEXT: ),
 
-// CHECK:      (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK:      (('sh_name', 0x0000000c) # '.rela.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000004)
 // CHECK-NEXT:  ('sh_flags', 0x00000000)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
-// CHECK-NEXT:  ('sh_offset', 0x00000160)
+// CHECK-NEXT:  ('sh_offset', 0x00000398)
 // CHECK-NEXT:  ('sh_size', 0x00000018)
-// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_link', 0x00000007)
 // CHECK-NEXT:  ('sh_info', 0x00000004)
 // CHECK-NEXT:  ('sh_addralign', 0x00000008)
 // CHECK-NEXT:  ('sh_entsize', 0x00000018)
diff --git a/test/MC/ELF/cfi.s b/test/MC/ELF/cfi.s
index 93fd2e7..133b858 100644
--- a/test/MC/ELF/cfi.s
+++ b/test/MC/ELF/cfi.s
@@ -213,7 +213,7 @@ f36:
         .cfi_endproc
 
 // CHECK:      # Section 0x00000004
-// CHECK-NEXT: (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT: (('sh_name', 0x00000011) # '.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000002)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
@@ -226,14 +226,14 @@ f36:
 // CHECK-NEXT:  ('_section_data', '14000000 00000000 017a4c52 00017810 02031b0c 07089001 14000000 1c000000 00000000 01000000 04000000 00000000 20000000 00000000 017a504c 52000178 100b0000 00000000 00000003 1b0c0708 90010000 14000000 28000000 00000000 01000000 04000000 00000000 14000000 70000000 00000000 01000000 04000000 00000000 20000000 00000000 017a504c 52000178 100b0000 00000000 00000002 1b0c0708 90010000 10000000 28000000 00000000 01000000 02000000 18000000 00000000 017a5052 00017810 04020000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06030000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a040000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 040a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 060b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a0c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a080000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a100000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 04120000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06130000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a140000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 041a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 061b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a1c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a180000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a800000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 04820000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06830000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a840000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 048a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 068b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a8c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a880000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a900000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 04920000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06930000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a940000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 049a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 069b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a9c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a980000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000')
 // CHECK-NEXT: ),
 
-// CHECK:        # Section 0x00000008
-// CHECK-NEXT: (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK:        # Section 0x00000005
+// CHECK-NEXT: (('sh_name', 0x0000000c) # '.rela.eh_frame'
 // CHECK-NEXT:  ('sh_type', 0x00000004)
 // CHECK-NEXT:  ('sh_flags', 0x00000000)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
-// CHECK-NEXT:  ('sh_offset', 0x00000bf8)
+// CHECK-NEXT:  ('sh_offset', 0x00000e30)
 // CHECK-NEXT:  ('sh_size', 0x000006c0)
-// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_link', 0x00000007)
 // CHECK-NEXT:  ('sh_info', 0x00000004)
 // CHECK-NEXT:  ('sh_addralign', 0x00000008)
 // CHECK-NEXT:  ('sh_entsize', 0x00000018)
diff --git a/test/MC/ELF/comdat.s b/test/MC/ELF/comdat.s
index 0f1164e..be7128b 100644
--- a/test/MC/ELF/comdat.s
+++ b/test/MC/ELF/comdat.s
@@ -4,37 +4,37 @@
 // of the file.
 
 // CHECK:       # Section 0x00000001
-// CHECK-NEXT:  (('sh_name', 0x00000026) # '.group'
+// CHECK-NEXT:  (('sh_name', 0x0000001b) # '.group'
 // CHECK-NEXT:   ('sh_type', 0x00000011)
 // CHECK-NEXT:   ('sh_flags', 0x00000000)
 // CHECK-NEXT:   ('sh_addr', 0x00000000)
 // CHECK-NEXT:   ('sh_offset', 0x00000040)
 // CHECK-NEXT:   ('sh_size', 0x0000000c)
-// CHECK-NEXT:   ('sh_link', 0x0000000c)
+// CHECK-NEXT:   ('sh_link', 0x0000000d)
 // CHECK-NEXT:   ('sh_info', 0x00000001)
 // CHECK-NEXT:   ('sh_addralign', 0x00000004)
 // CHECK-NEXT:   ('sh_entsize', 0x00000004)
 // CHECK-NEXT:  ),
 // CHECK-NEXT:  # Section 0x00000002
-// CHECK-NEXT:  (('sh_name', 0x00000026) # '.group'
+// CHECK-NEXT:  (('sh_name', 0x0000001b) # '.group'
 // CHECK-NEXT:   ('sh_type', 0x00000011)
 // CHECK-NEXT:   ('sh_flags', 0x00000000)
 // CHECK-NEXT:   ('sh_addr', 0x00000000)
 // CHECK-NEXT:   ('sh_offset', 0x0000004c)
 // CHECK-NEXT:   ('sh_size', 0x00000008)
-// CHECK-NEXT:   ('sh_link', 0x0000000c)
+// CHECK-NEXT:   ('sh_link', 0x0000000d)
 // CHECK-NEXT:   ('sh_info', 0x00000002)
 // CHECK-NEXT:   ('sh_addralign', 0x00000004)
 // CHECK-NEXT:   ('sh_entsize', 0x00000004)
 // CHECK-NEXT:  ),
 // CHECK-NEXT:  # Section 0x00000003
-// CHECK-NEXT:  (('sh_name', 0x00000026) # '.group'
+// CHECK-NEXT:  (('sh_name', 0x0000001b) # '.group'
 // CHECK-NEXT:   ('sh_type', 0x00000011)
 // CHECK-NEXT:   ('sh_flags', 0x00000000)
 // CHECK-NEXT:   ('sh_addr', 0x00000000)
 // CHECK-NEXT:   ('sh_offset', 0x00000054)
 // CHECK-NEXT:   ('sh_size', 0x00000008)
-// CHECK-NEXT:   ('sh_link', 0x0000000c)
+// CHECK-NEXT:   ('sh_link', 0x0000000d)
 // CHECK-NEXT:   ('sh_info', 0x0000000d)
 // CHECK-NEXT:   ('sh_addralign', 0x00000004)
 // CHECK-NEXT:   ('sh_entsize', 0x00000004)
diff --git a/test/MC/ELF/common.s b/test/MC/ELF/common.s
index 16b677b..70e2ed2 100644
--- a/test/MC/ELF/common.s
+++ b/test/MC/ELF/common.s
@@ -38,7 +38,7 @@
 // CHECK-NEXT:  ('st_bind', 0x00000000)
 // CHECK-NEXT:  ('st_type', 0x00000001)
 // CHECK-NEXT:  ('st_other', 0x00000000)
-// CHECK-NEXT:  ('st_shndx', 0x00000003)
+// CHECK-NEXT:  ('st_shndx', 0x00000004)
 // CHECK-NEXT:  ('st_value', 0x0000000000000010)
 // CHECK-NEXT:  ('st_size', 0x0000000000000008)
 // CHECK-NEXT: ),
diff --git a/test/MC/ELF/common2.s b/test/MC/ELF/common2.s
index b54cdfe..b48ca66 100644
--- a/test/MC/ELF/common2.s
+++ b/test/MC/ELF/common2.s
@@ -9,7 +9,7 @@
         .zero 1
 	.align	8
 
-// CHECK:      (('sh_name', 0x0000000d) # '.bss'
+// CHECK:      (('sh_name', 0x00000007) # '.bss'
 // CHECK-NEXT:  ('sh_type',
 // CHECK-NEXT:  ('sh_flags'
 // CHECK-NEXT:  ('sh_addr',
diff --git a/test/MC/ELF/debug-line.s b/test/MC/ELF/debug-line.s
index 2979ca2..aa0197d 100644
--- a/test/MC/ELF/debug-line.s
+++ b/test/MC/ELF/debug-line.s
@@ -2,7 +2,7 @@
 
 // Test that .debug_line is populated.
 
-// CHECK:     (('sh_name', 0x00000012) # '.debug_line'
+// CHECK:     (('sh_name', 0x00000011) # '.debug_line'
 // CHECK-NEXT: ('sh_type', 0x00000001)
 // CHECK-NEXT: ('sh_flags', 0x00000000)
 // CHECK-NEXT: ('sh_addr', 0x00000000)
diff --git a/test/MC/ELF/debug-loc.s b/test/MC/ELF/debug-loc.s
index 36ae485..68e36a5 100644
--- a/test/MC/ELF/debug-loc.s
+++ b/test/MC/ELF/debug-loc.s
@@ -8,7 +8,7 @@
 // FIXME2: We need a debug_line dumper so that we can test the actual contents.
 
 // CHECK:      # Section 0x00000004
-// CHECK-NEXT: (('sh_name', 0x00000012) # '.debug_line'
+// CHECK-NEXT: (('sh_name', 0x00000011) # '.debug_line'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000000)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
diff --git a/test/MC/ELF/empty-dwarf-lines.s b/test/MC/ELF/empty-dwarf-lines.s
index 0f791ae..1b135a6 100644
--- a/test/MC/ELF/empty-dwarf-lines.s
+++ b/test/MC/ELF/empty-dwarf-lines.s
@@ -8,7 +8,7 @@ c:
         .asciz   "hi\n"
 
 // CHECK:      # Section 0x00000004
-// CHECK-NEXT: (('sh_name', 0x00000012) # '.debug_line'
+// CHECK-NEXT: (('sh_name', 0x0000000c) # '.debug_line'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000000)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
diff --git a/test/MC/ELF/empty.s b/test/MC/ELF/empty.s
index e351936..d98f0c6 100644
--- a/test/MC/ELF/empty.s
+++ b/test/MC/ELF/empty.s
@@ -14,7 +14,7 @@
 // CHECK-NEXT: ('sh_addralign', 0x00000004)
 // CHECK-NEXT: ('sh_entsize', 0x00000000)
 
-// CHECK:      ('sh_name', 0x00000007) # '.data'
+// CHECK:      ('sh_name', 0x00000026) # '.data'
 // CHECK-NEXT: ('sh_type', 0x00000001)
 // CHECK-NEXT: ('sh_flags', 0x00000003)
 // CHECK-NEXT: ('sh_addr', 0x00000000)
@@ -25,7 +25,7 @@
 // CHECK-NEXT: ('sh_addralign', 0x00000004)
 // CHECK-NEXT: ('sh_entsize', 0x00000000)
 
-// CHECK:      ('sh_name', 0x0000000d) # '.bss'
+// CHECK:      ('sh_name', 0x00000007) # '.bss'
 // CHECK-NEXT: ('sh_type', 0x00000008)
 // CHECK-NEXT: ('sh_flags', 0x00000003)
 // CHECK-NEXT: ('sh_addr', 0x00000000)
@@ -36,7 +36,7 @@
 // CHECK-NEXT: ('sh_addralign', 0x00000004)
 // CHECK-NEXT: ('sh_entsize', 0x00000000)
 
-// CHECK:      ('sh_name', 0x00000012) # '.shstrtab'
+// CHECK:      ('sh_name', 0x0000000c) # '.shstrtab'
 // CHECK-NEXT: ('sh_type', 0x00000003)
 // CHECK-NEXT:    ('sh_flags', 0x00000000)
 // CHECK-NEXT:    ('sh_addr', 0x00000000)
@@ -47,7 +47,7 @@
 // CHECK-NEXT:    ('sh_addralign', 0x00000001)
 // CHECK-NEXT:    ('sh_entsize', 0x00000000)
 
-// CHECK: ('sh_name', 0x0000001c) # '.symtab'
+// CHECK: ('sh_name', 0x0000001e) # '.symtab'
 // CHECK-NEXT:    ('sh_type', 0x00000002)
 // CHECK-NEXT:    ('sh_flags', 0x00000000)
 // CHECK-NEXT:    ('sh_addr', 0x00000000)
@@ -58,7 +58,7 @@
 // CHECK-NEXT:    ('sh_addralign', 0x00000008)
 // CHECK-NEXT:    ('sh_entsize', 0x00000018)
 
-// CHECK: ('sh_name', 0x00000024) # '.strtab'
+// CHECK: ('sh_name', 0x00000016) # '.strtab'
 // CHECK-NEXT:    ('sh_type', 0x00000003)
 // CHECK-NEXT:    ('sh_flags', 0x00000000)
 // CHECK-NEXT:    ('sh_addr', 0x00000000)
diff --git a/test/MC/ELF/entsize.ll b/test/MC/ELF/entsize.ll
index 21179df..74f0413 100644
--- a/test/MC/ELF/entsize.ll
+++ b/test/MC/ELF/entsize.ll
@@ -20,7 +20,7 @@ declare void @foo(i64* nocapture) nounwind
 
 ;;;;;
 
-; 64: (('sh_name', 0x00000012) # '.rodata.str1.1'
+; 64: (('sh_name', 0x0000004e) # '.rodata.str1.1'
 ; 64-NEXT:   ('sh_type', 0x00000001)
 ; 64-NEXT:   ('sh_flags', 0x00000032)
 ; 64-NEXT:   ('sh_addr',
@@ -31,7 +31,7 @@ declare void @foo(i64* nocapture) nounwind
 ; 64-NEXT:   ('sh_addralign', 0x00000001)
 ; 64-NEXT:   ('sh_entsize', 0x00000001)
 
-; 64: (('sh_name', 0x00000021) # '.rodata.cst8'
+; 64: (('sh_name', 0x00000041) # '.rodata.cst8'
 ; 64-NEXT:   ('sh_type', 0x00000001)
 ; 64-NEXT:   ('sh_flags', 0x00000012)
 ; 64-NEXT:   ('sh_addr',
diff --git a/test/MC/ELF/entsize.s b/test/MC/ELF/entsize.s
index e8eb62e..415eaef 100644
--- a/test/MC/ELF/entsize.s
+++ b/test/MC/ELF/entsize.s
@@ -33,7 +33,7 @@
     .quad 42
 
 // CHECK: # Section 0x00000004
-// CHECK-NEXT:   ('sh_name', 0x00000012) # '.rodata.str1.1'
+// CHECK-NEXT:   ('sh_name', 0x00000048) # '.rodata.str1.1'
 // CHECK-NEXT:   ('sh_type', 0x00000001)
 // CHECK-NEXT:   ('sh_flags', 0x00000032)
 // CHECK-NEXT:   ('sh_addr',
@@ -45,7 +45,7 @@
 // CHECK-NEXT:   ('sh_entsize', 0x00000001)
 
 // CHECK: # Section 0x00000005
-// CHECK-NEXT:   ('sh_name', 0x00000021) # '.rodata.str2.1'
+// CHECK-NEXT:   ('sh_name', 0x00000039) # '.rodata.str2.1'
 // CHECK-NEXT:   ('sh_type', 0x00000001)
 // CHECK-NEXT:   ('sh_flags', 0x00000032)
 // CHECK-NEXT:   ('sh_addr',
@@ -57,7 +57,7 @@
 // CHECK-NEXT:   ('sh_entsize', 0x00000002)
 
 // CHECK: # Section 0x00000006
-// CHECK-NEXT:   ('sh_name', 0x00000030) # '.rodata.cst8
+// CHECK-NEXT:   ('sh_name', 0x0000002c) # '.rodata.cst8
 // CHECK-NEXT:   ('sh_type', 0x00000001)
 // CHECK-NEXT:   ('sh_flags', 0x00000012)
 // CHECK-NEXT:   ('sh_addr',
diff --git a/test/MC/ELF/global-offset.s b/test/MC/ELF/global-offset.s
index aa63287..77f7267 100644
--- a/test/MC/ELF/global-offset.s
+++ b/test/MC/ELF/global-offset.s
@@ -5,7 +5,7 @@
 
         addl    $_GLOBAL_OFFSET_TABLE_, %ebx
 
-// CHECK:      ('sh_name', 0x00000001) # '.text'
+// CHECK:      ('sh_name', 0x00000005) # '.text'
 // CHECK-NEXT: ('sh_type',
 // CHECK-NEXT: ('sh_flags',
 // CHECK-NEXT: ('sh_addr',
diff --git a/test/MC/ELF/got.s b/test/MC/ELF/got.s
index 798150e..a3abef5 100644
--- a/test/MC/ELF/got.s
+++ b/test/MC/ELF/got.s
@@ -6,9 +6,6 @@
         movl	foo@GOT, %eax
         movl	foo@GOTPCREL(%rip), %eax
 
-// CHECK:     (('st_name', 0x00000005) # '_GLOBAL_OFFSET_TABLE_'
-// CHECK-NEXT: ('st_bind', 0x00000001)
-
 // CHECK:      ('_relocations', [
 // CHECK-NEXT:   # Relocation 0x00000000
 // CHECK-NEXT:    (('r_offset',
@@ -23,3 +20,6 @@
 // CHECK-NEXT:     ('r_addend',
 // CHECK-NEXT:    ),
 // CHECK-NEXT:   ])
+
+// CHECK:     (('st_name', 0x00000005) # '_GLOBAL_OFFSET_TABLE_'
+// CHECK-NEXT: ('st_bind', 0x00000001)
diff --git a/test/MC/ELF/ident.s b/test/MC/ELF/ident.s
index f79458f..b364d60 100644
--- a/test/MC/ELF/ident.s
+++ b/test/MC/ELF/ident.s
@@ -1,6 +1,6 @@
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
 
-// CHECK:       (('sh_name', 0x00000012) # '.comment'
+// CHECK:       (('sh_name', 0x00000007) # '.comment'
 // CHECK-NEXT:   ('sh_type', 0x00000001)
 // CHECK-NEXT:   ('sh_flags', 0x00000030)
 // CHECK-NEXT:   ('sh_addr', 0x00000000)
diff --git a/test/MC/ELF/local-reloc.s b/test/MC/ELF/local-reloc.s
index c2b4771..8384e0e 100644
--- a/test/MC/ELF/local-reloc.s
+++ b/test/MC/ELF/local-reloc.s
@@ -10,16 +10,6 @@ foo:
 // CHECK:        # Section 0x00000001
 // CHECK-next:  (('sh_name', 0x00000001) # '.text'
 
-// Symbol number 2 is section number 1
-// CHECK:    # Symbol 0x00000002
-// CHECK-NEXT:    (('st_name', 0x00000000) # ''
-// CHECK-NEXT:     ('st_bind', 0x00000000)
-// CHECK-NEXT:     ('st_type', 0x00000003)
-// CHECK-NEXT:     ('st_other', 0x00000000)
-// CHECK-NEXT:     ('st_shndx', 0x00000001)
-// CHECK-NEXT:     ('st_value', 0x0000000000000000)
-// CHECK-NEXT:     ('st_size', 0x0000000000000000)
-
 // Relocation refers to symbol number 2
 // CHECK:      ('_relocations', [
 // CHECK-NEXT:  # Relocation 0x00000000
@@ -29,3 +19,13 @@ foo:
 // CHECK-NEXT:    ('r_addend',
 // CHECK-NEXT:   ),
 // CHECK-NEXT:  ])
+
+// Symbol number 2 is section number 1
+// CHECK:    # Symbol 0x00000002
+// CHECK-NEXT:    (('st_name', 0x00000000) # ''
+// CHECK-NEXT:     ('st_bind', 0x00000000)
+// CHECK-NEXT:     ('st_type', 0x00000003)
+// CHECK-NEXT:     ('st_other', 0x00000000)
+// CHECK-NEXT:     ('st_shndx', 0x00000001)
+// CHECK-NEXT:     ('st_value', 0x0000000000000000)
+// CHECK-NEXT:     ('st_size', 0x0000000000000000)
diff --git a/test/MC/ELF/merge.s b/test/MC/ELF/merge.s
index ec02228..befc2bf 100644
--- a/test/MC/ELF/merge.s
+++ b/test/MC/ELF/merge.s
@@ -22,30 +22,6 @@ zed:
         .section	bar,"ax",@progbits
 foo:
 
-// Section 4 is "sec1"
-// CHECK: # Section 0x00000004
-// CHECK-NEXT:  (('sh_name', 0x00000012) # '.sec1'
-
-// Symbol number 1 is .Lfoo
-// CHECK:      # Symbol 0x00000001
-// CHECK-NEXT: (('st_name', 0x00000001) # '.Lfoo'
-
-// Symbol number 2 is foo
-// CHECK:      # Symbol 0x00000002
-// CHECK-NEXT: (('st_name', 0x00000007) # 'foo'
-
-// Symbol number 6 is section 4
-// CHECK:        # Symbol 0x00000006
-// CHECK-NEXT:    (('st_name', 0x00000000) # ''
-// CHECK-NEXT:     ('st_bind', 0x00000000)
-// CHECK-NEXT:     ('st_type', 0x00000003)
-// CHECK-NEXT:     ('st_other', 0x00000000)
-// CHECK-NEXT:     ('st_shndx', 0x00000004)
-
-// Symbol number 8 is zed
-// CHECK:        # Symbol 0x00000008
-// CHECK-NEXT:    (('st_name', 0x0000000b) # 'zed'
-
 // Relocation 0 refers to symbol 1
 // CHECK:       ('_relocations', [
 // CHECK-NEXT:   # Relocation 0
@@ -95,3 +71,27 @@ foo:
 // CHECK-NEXT:    ('r_addend', 0x00000000)
 // CHECK-NEXT:   ),
 // CHECK-NEXT:  ])
+
+// Section 5 is "sec1"
+// CHECK: # Section 0x00000005
+// CHECK-NEXT:  (('sh_name', 0x00000035) # '.sec1'
+
+// Symbol number 1 is .Lfoo
+// CHECK:      # Symbol 0x00000001
+// CHECK-NEXT: (('st_name', 0x00000001) # '.Lfoo'
+
+// Symbol number 2 is foo
+// CHECK:      # Symbol 0x00000002
+// CHECK-NEXT: (('st_name', 0x00000007) # 'foo'
+
+// Symbol number 6 is section 5
+// CHECK:        # Symbol 0x00000006
+// CHECK-NEXT:    (('st_name', 0x00000000) # ''
+// CHECK-NEXT:     ('st_bind', 0x00000000)
+// CHECK-NEXT:     ('st_type', 0x00000003)
+// CHECK-NEXT:     ('st_other', 0x00000000)
+// CHECK-NEXT:     ('st_shndx', 0x00000005)
+
+// Symbol number 8 is zed
+// CHECK:        # Symbol 0x00000008
+// CHECK-NEXT:    (('st_name', 0x0000000b) # 'zed'
diff --git a/test/MC/ELF/noexec.s b/test/MC/ELF/noexec.s
index 87b6f3a..c4b7d98 100644
--- a/test/MC/ELF/noexec.s
+++ b/test/MC/ELF/noexec.s
@@ -1,7 +1,7 @@
 // RUN: llvm-mc -mc-no-exec-stack -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck  %s
 
 // CHECK:       # Section 0x00000004
-// CHECK-NEXT:  (('sh_name', 0x00000012) # '.note.GNU-stack'
+// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.note.GNU-stack'
 // CHECK-NEXT:   ('sh_type', 0x00000001)
 // CHECK-NEXT:   ('sh_flags', 0x00000000)
 // CHECK-NEXT:   ('sh_addr', 0x00000000)
diff --git a/test/MC/ELF/pic-diff.s b/test/MC/ELF/pic-diff.s
index d1fc909..c2f96c2 100644
--- a/test/MC/ELF/pic-diff.s
+++ b/test/MC/ELF/pic-diff.s
@@ -1,5 +1,14 @@
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
 
+// CHECK:       ('_relocations', [
+// CHECK-NEXT:    # Relocation 0x00000000
+// CHECK-NEXT:    (('r_offset', 0x0000000c)
+// CHECK-NEXT:     ('r_sym', 0x00000005)
+// CHECK-NEXT:     ('r_type', 0x00000002)
+// CHECK-NEXT:     ('r_addend', 0x00000008)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+
 // CHECK:         # Symbol 0x00000005
 // CHECK-NEXT:    (('st_name', 0x00000005) # 'baz'
 // CHECK-NEXT:     ('st_bind', 0x00000001)
@@ -10,15 +19,6 @@
 // CHECK-NEXT:     ('st_size', 0x0000000000000000)
 // CHECK-NEXT:    ),
 
-// CHECK:       ('_relocations', [
-// CHECK-NEXT:    # Relocation 0x00000000
-// CHECK-NEXT:    (('r_offset', 0x0000000c)
-// CHECK-NEXT:     ('r_sym', 0x00000005)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x00000008)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-
 .zero 4
 .data
 
diff --git a/test/MC/ELF/relocation-386.s b/test/MC/ELF/relocation-386.s
index f106f89..f7b20b5 100644
--- a/test/MC/ELF/relocation-386.s
+++ b/test/MC/ELF/relocation-386.s
@@ -3,33 +3,6 @@
 // Test that we produce the correct relocation types and that the relocations
 // correctly point to the section or the symbol.
 
-// Section 3 is bss
-// CHECK:      # Section 0x00000003
-// CHECK-NEXT: (('sh_name', 0x0000000d) # '.bss'
-
-// CHECK:      # Symbol 0x00000001
-// CHECK-NEXT: (('st_name', 0x00000005) # '.Lfoo'
-
-// Symbol 4 is zed
-// CHECK:      # Symbol 0x00000004
-// CHECK-NEXT: (('st_name', 0x00000035) # 'zed'
-// CHECK-NEXT:  ('st_value', 0x00000000)
-// CHECK-NEXT:  ('st_size', 0x00000000)
-// CHECK-NEXT:  ('st_bind', 0x00000000)
-// CHECK-NEXT:  ('st_type', 0x00000006)
-// CHECK-NEXT:  ('st_other', 0x00000000)
-// CHECK-NEXT:  ('st_shndx', 0x00000004)
-
-// Symbol 7 is section 3
-// CHECK:      # Symbol 0x00000007
-// CHECK-NEXT: (('st_name', 0x00000000) # ''
-// CHECK-NEXT:  ('st_value', 0x00000000)
-// CHECK-NEXT:  ('st_size', 0x00000000)
-// CHECK-NEXT:  ('st_bind', 0x00000000)
-// CHECK-NEXT:  ('st_type', 0x00000003)
-// CHECK-NEXT:  ('st_other', 0x00000000)
-// CHECK-NEXT:  ('st_shndx', 0x00000003)
-
 // CHECK:      # Relocation 0x00000000
 // CHECK-NEXT: (('r_offset', 0x00000002)
 // CHECK-NEXT:  ('r_sym', 0x00000001)
@@ -181,6 +154,34 @@
 // CHECK-NEXT:  ('r_type', 0x00000001)
 // CHECK-NEXT: ),
 
+// Section 4 is bss
+// CHECK:      # Section 0x00000004
+// CHECK-NEXT: (('sh_name', 0x0000000b) # '.bss'
+
+// CHECK:      # Symbol 0x00000001
+// CHECK-NEXT: (('st_name', 0x00000005) # '.Lfoo'
+
+// Symbol 4 is zed
+// CHECK:      # Symbol 0x00000004
+// CHECK-NEXT: (('st_name', 0x00000035) # 'zed'
+// CHECK-NEXT:  ('st_value', 0x00000000)
+// CHECK-NEXT:  ('st_size', 0x00000000)
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000006)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000005)
+
+// Symbol 7 is section 4
+// CHECK:      # Symbol 0x00000007
+// CHECK-NEXT: (('st_name', 0x00000000) # ''
+// CHECK-NEXT:  ('st_value', 0x00000000)
+// CHECK-NEXT:  ('st_size', 0x00000000)
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000003)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000004)
+
+
         .text
 bar:
 	leal	.Lfoo@GOTOFF(%ebx), %eax
diff --git a/test/MC/ELF/relocation-pc.s b/test/MC/ELF/relocation-pc.s
index 58c5f41..36212cb 100644
--- a/test/MC/ELF/relocation-pc.s
+++ b/test/MC/ELF/relocation-pc.s
@@ -5,14 +5,14 @@
 	loope	0                 # R_X86_64_PC8
 	jmp	-256              # R_X86_64_PC32
 
-// CHECK:      # Section 0x00000007
-// CHECK-NEXT: (('sh_name', 0x0000002c) # '.rela.text'
+// CHECK:      # Section 0x00000002
+// CHECK-NEXT: (('sh_name', 0x00000001) # '.rela.text'
 // CHECK-NEXT:  ('sh_type', 0x00000004)
 // CHECK-NEXT:  ('sh_flags', 0x00000000)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
-// CHECK-NEXT:  ('sh_offset', 0x000000e8)
+// CHECK-NEXT:  ('sh_offset', 0x000002e8)
 // CHECK-NEXT:  ('sh_size', 0x00000030)
-// CHECK-NEXT:  ('sh_link', 0x00000005)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
 // CHECK-NEXT:  ('sh_info', 0x00000001)
 // CHECK-NEXT:  ('sh_addralign', 0x00000008)
 // CHECK-NEXT:  ('sh_entsize', 0x00000018)
diff --git a/test/MC/ELF/relocation.s b/test/MC/ELF/relocation.s
index dabe721..4df09e1 100644
--- a/test/MC/ELF/relocation.s
+++ b/test/MC/ELF/relocation.s
@@ -20,14 +20,7 @@ bar:
 
 
 // CHECK:  # Section 0x00000001
-// CHECK: (('sh_name', 0x00000001) # '.text'
-
-// CHECK:   # Symbol 0x00000002
-// CHECK: (('st_name', 0x00000000) # ''
-// CHECK:  ('st_bind', 0x00000000)
-// CHECK:  ('st_type', 0x00000003)
-// CHECK:  ('st_other', 0x00000000)
-// CHECK:  ('st_shndx', 0x00000001)
+// CHECK: (('sh_name', 0x00000006) # '.text'
 
 // CHECK: # Relocation 0x00000000
 // CHECK-NEXT:  (('r_offset', 0x00000001)
@@ -112,3 +105,10 @@ bar:
 // CHECK-NEXT:  ('r_sym', 0x00000006)
 // CHECK-NEXT:  ('r_type', 0x00000002)
 // CHECK-NEXT:  ('r_addend', 0x0000005c)
+
+// CHECK:   # Symbol 0x00000002
+// CHECK: (('st_name', 0x00000000) # ''
+// CHECK:  ('st_bind', 0x00000000)
+// CHECK:  ('st_type', 0x00000003)
+// CHECK:  ('st_other', 0x00000000)
+// CHECK:  ('st_shndx', 0x00000001)
diff --git a/test/MC/ELF/rename.s b/test/MC/ELF/rename.s
index 3606560..e7cedd0 100644
--- a/test/MC/ELF/rename.s
+++ b/test/MC/ELF/rename.s
@@ -17,7 +17,7 @@ defined3:
 
 // Section 1 is .text
 // CHECK:      # Section 0x00000001
-// CHECK-NEXT: (('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT: (('sh_name', 0x00000006) # '.text'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000006)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
@@ -28,6 +28,13 @@ defined3:
 // CHECK-NEXT:  ('sh_addralign', 0x00000004)
 // CHECK-NEXT:  ('sh_entsize', 0x00000000)
 
+// The relocation uses symbol 2
+// CHECK:      # Relocation 0x00000000
+// CHECK-NEXT: (('r_offset', 0x00000000)
+// CHECK-NEXT:  ('r_sym', 0x00000002)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+
 // Symbol 2 is section 1
 // CHECK:      # Symbol 0x00000002
 // CHECK-NEXT: (('st_name', 0x00000000) # ''
@@ -37,10 +44,3 @@ defined3:
 // CHECK-NEXT:  ('st_shndx', 0x00000001)
 // CHECK-NEXT:  ('st_value', 0x0000000000000000)
 // CHECK-NEXT:  ('st_size', 0x0000000000000000)
-
-// The relocation uses symbol 2
-// CHECK:      # Relocation 0x00000000
-// CHECK-NEXT: (('r_offset', 0x00000000)
-// CHECK-NEXT:  ('r_sym', 0x00000002)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x00000000)
diff --git a/test/MC/ELF/section-quoting.s b/test/MC/ELF/section-quoting.s
new file mode 100644
index 0000000..3751e72
--- /dev/null
+++ b/test/MC/ELF/section-quoting.s
@@ -0,0 +1,10 @@
+// RUN: llvm-mc -triple x86_64-pc-linux-gnu %s -o - | FileCheck %s
+
+// Test that we handle the strings like gas
+.section bar-"foo"
+.section "foo"
+.section "foo bar"
+
+// CHECK: .section "bar-\"foo\""
+// CHECK: .section foo
+// CHECK: .section "foo bar"
diff --git a/test/MC/ELF/section.s b/test/MC/ELF/section.s
index 861dc4f..5fb5525 100644
--- a/test/MC/ELF/section.s
+++ b/test/MC/ELF/section.s
@@ -7,10 +7,10 @@
 .section	.note.GNU-,"",@progbits
 .section	-.note.GNU,"",@progbits
 
-// CHECK: ('sh_name', 0x00000012) # '.note.GNU-stack'
-// CHECK: ('sh_name', 0x00000022) # '.note.GNU-stack2'
-// CHECK: ('sh_name', 0x00000033) # '.note.GNU-'
-// CHECK: ('sh_name', 0x0000003e) # '-.note.GNU'
+// CHECK: ('sh_name', 0x00000038) # '.note.GNU-stack'
+// CHECK: ('sh_name', 0x0000008f) # '.note.GNU-stack2'
+// CHECK: ('sh_name', 0x000000a0) # '.note.GNU-'
+// CHECK: ('sh_name', 0x00000084) # '-.note.GNU'
 
 // Test that the defaults are used
 
@@ -19,7 +19,7 @@
 .section	.rodata
 .section	zed, ""
 
-// CHECK:      (('sh_name', 0x00000049) # '.init'
+// CHECK:      (('sh_name', 0x00000012) # '.init'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000006)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
@@ -31,7 +31,7 @@
 // CHECK-NEXT:  ('sh_entsize', 0x00000000)
 // CHECK-NEXT: ),
 // CHECK-NEXT: # Section 0x0000000b
-// CHECK-NEXT: (('sh_name', 0x0000004f) # '.fini'
+// CHECK-NEXT: (('sh_name', 0x00000048) # '.fini'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000006)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
@@ -43,7 +43,7 @@
 // CHECK-NEXT:  ('sh_entsize', 0x00000000)
 // CHECK-NEXT: ),
 // CHECK-NEXT: # Section 0x0000000c
-// CHECK-NEXT: (('sh_name', 0x00000055) # '.rodata'
+// CHECK-NEXT: (('sh_name', 0x00000076) # '.rodata'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000002)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
@@ -55,7 +55,7 @@
 // CHECK-NEXT:  ('sh_entsize', 0x00000000)
 // CHECK-NEXT: ),
 // CHECK-NEXT: # Section 0x0000000d
-// CHECK-NEXT: (('sh_name', 0x0000005d) # 'zed'
+// CHECK-NEXT: (('sh_name', 0x00000058) # 'zed'
 // CHECK-NEXT:  ('sh_type', 0x00000001)
 // CHECK-NEXT:  ('sh_flags', 0x00000000)
 // CHECK-NEXT:  ('sh_addr', 0x00000000)
@@ -68,7 +68,7 @@
 // CHECK-NEXT: ),
 
 .section	.note.test,"",@note
-// CHECK:       (('sh_name', 0x00000061) # '.note.test'
+// CHECK:       (('sh_name', 0x00000007) # '.note.test'
 // CHECK-NEXT:   ('sh_type', 0x00000007)
 // CHECK-NEXT:   ('sh_flags', 0x00000000)
 // CHECK-NEXT:   ('sh_addr', 0x00000000)
@@ -90,7 +90,7 @@ bar:
 
 .section .eh_frame,"a",@unwind
 
-// CHECK:       (('sh_name', 0x00000080) # '.eh_frame'
+// CHECK:       (('sh_name', 0x0000004e) # '.eh_frame'
 // CHECK-NEXT:   ('sh_type', 0x70000001)
 // CHECK-NEXT:   ('sh_flags', 0x00000002)
 // CHECK-NEXT:   ('sh_addr', 0x00000000)
@@ -106,5 +106,5 @@ bar:
 .section bar-"foo"
 .section "foo"
 
-// CHECK: ('sh_name', 0x0000008a) # 'bar-"foo"'
-// CHECK: ('sh_name', 0x00000094) # 'foo'
+// CHECK: ('sh_name', 0x000000ab) # 'bar-"foo"'
+// CHECK: ('sh_name', 0x00000034) # 'foo'
diff --git a/test/MC/ELF/symref.s b/test/MC/ELF/symref.s
index b99e71b..d945c82 100644
--- a/test/MC/ELF/symref.s
+++ b/test/MC/ELF/symref.s
@@ -22,6 +22,38 @@ defined3:
 global1:
 
 
+// CHECK:      # Relocation 0x00000000
+// CHECK-NEXT: (('r_offset', 0x00000000)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 0x00000001
+// CHECK-NEXT: (('r_offset', 0x00000004)
+// CHECK-NEXT:  ('r_sym', 0x0000000b)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 0x00000002
+// CHECK-NEXT: (('r_offset', 0x00000008)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 0x00000003
+// CHECK-NEXT: (('r_offset', 0x0000000c)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 0x00000004
+// CHECK-NEXT: (('r_offset', 0x00000010)
+// CHECK-NEXT:  ('r_sym', 0x0000000c)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT:])
+
 // CHECK:      # Symbol 0x00000001
 // CHECK-NEXT: (('st_name', 0x00000013) # 'bar1@zed'
 // CHECK-NEXT:  ('st_bind', 0x00000000)
@@ -81,7 +113,7 @@ global1:
 // CHECK-NEXT:  ('st_bind', 0x00000000)
 // CHECK-NEXT:  ('st_type', 0x00000003)
 // CHECK-NEXT:  ('st_other', 0x00000000)
-// CHECK-NEXT:  ('st_shndx', 0x00000002)
+// CHECK-NEXT:  ('st_shndx', 0x00000003)
 // CHECK-NEXT:  ('st_value', 0x0000000000000000)
 // CHECK-NEXT:  ('st_size', 0x0000000000000000)
 // CHECK-NEXT: ),
@@ -90,7 +122,7 @@ global1:
 // CHECK-NEXT:  ('st_bind', 0x00000000)
 // CHECK-NEXT:  ('st_type', 0x00000003)
 // CHECK-NEXT:  ('st_other', 0x00000000)
-// CHECK-NEXT:  ('st_shndx', 0x00000003)
+// CHECK-NEXT:  ('st_shndx', 0x00000004)
 // CHECK-NEXT:  ('st_value', 0x0000000000000000)
 // CHECK-NEXT:  ('st_size', 0x0000000000000000)
 // CHECK-NEXT: ),
@@ -131,35 +163,3 @@ global1:
 // CHECK-NEXT:  ('st_size', 0x0000000000000000)
 // CHECK-NEXT: ),
 // CHECK-NEXT:])
-
-// CHECK:      # Relocation 0x00000000
-// CHECK-NEXT: (('r_offset', 0x00000000)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x00000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Relocation 0x00000001
-// CHECK-NEXT: (('r_offset', 0x00000004)
-// CHECK-NEXT:  ('r_sym', 0x0000000b)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x00000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Relocation 0x00000002
-// CHECK-NEXT: (('r_offset', 0x00000008)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x00000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Relocation 0x00000003
-// CHECK-NEXT: (('r_offset', 0x0000000c)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x00000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Relocation 0x00000004
-// CHECK-NEXT: (('r_offset', 0x00000010)
-// CHECK-NEXT:  ('r_sym', 0x0000000c)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x00000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT:])
diff --git a/test/MC/ELF/tls-i386.s b/test/MC/ELF/tls-i386.s
index 459d4cc..c754121 100644
--- a/test/MC/ELF/tls-i386.s
+++ b/test/MC/ELF/tls-i386.s
@@ -8,6 +8,7 @@
         movl    foo4@TLSLDM(%eax), %eax
         movl    foo5@TPOFF(%eax), %eax
         movl    foo6@DTPOFF(%eax), %eax
+        movl    foo7@INDNTPOFF, %eax
 
 // CHECK:       (('st_name', 0x00000001) # 'foo1'
 // CHECK-NEXT:   ('st_value', 0x00000000)
@@ -62,3 +63,12 @@
 // CHECK-NEXT:   ('st_other', 0x00000000)
 // CHECK-NEXT:   ('st_shndx', 0x00000000)
 // CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000b
+// CHECK-NEXT:  (('st_name', 0x0000001f) # 'foo7'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/tls.s b/test/MC/ELF/tls.s
index 2517a5b..f9d6425 100644
--- a/test/MC/ELF/tls.s
+++ b/test/MC/ELF/tls.s
@@ -14,7 +14,7 @@ foobar:
 // CHECK-NEXT:  ('st_bind', 0x00000000)
 // CHECK-NEXT:  ('st_type', 0x00000006)
 // CHECK-NEXT:  ('st_other', 0x00000000)
-// CHECK-NEXT:  ('st_shndx', 0x00000004)
+// CHECK-NEXT:  ('st_shndx', 0x00000005)
 // CHECK-NEXT:  ('st_value', 0x0000000000000000)
 // CHECK-NEXT:  ('st_size', 0x0000000000000000)
 // CHECK-NEXT: ),
diff --git a/test/MC/ELF/undef2.s b/test/MC/ELF/undef2.s
index 9544fbc..6ce269b 100644
--- a/test/MC/ELF/undef2.s
+++ b/test/MC/ELF/undef2.s
@@ -7,4 +7,4 @@
 // CHECK: ('_symbols', [
 // CHECK:      (('st_name', 0x00000001) # '.Lfoo'
 // CHECK-NEXT:  ('st_bind', 0x00000001)
-// CHECK: (('sh_name', 0x00000024) # '.strtab'
+// CHECK: (('sh_name', 0x0000001b) # '.strtab'
diff --git a/test/MC/ELF/weak-relocation.s b/test/MC/ELF/weak-relocation.s
new file mode 100644
index 0000000..ef331d7
--- /dev/null
+++ b/test/MC/ELF/weak-relocation.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that weak symbols always produce relocations
+
+	.weak	foo
+foo:
+bar:
+        call    foo
+
+//CHECK:        # Relocation 0x00000000
+//CHECK-NEXT:   (('r_offset', 0x00000001)
+//CHECK-NEXT:    ('r_sym', 0x00000005)
+//CHECK-NEXT:    ('r_type', 0x00000002)
+//CHECK-NEXT:    ('r_addend', 0xfffffffc)
+//CHECK-NEXT:   ),
diff --git a/test/MC/ELF/weakref-reloc.s b/test/MC/ELF/weakref-reloc.s
index c7cd764..63c1f5e 100644
--- a/test/MC/ELF/weakref-reloc.s
+++ b/test/MC/ELF/weakref-reloc.s
@@ -7,6 +7,19 @@
         call    zed@PLT
 	call	bar
 
+// CHECK:      # Relocation 0x00000000
+// CHECK-NEXT: (('r_offset', 0x00000001)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x00000004)
+// CHECK-NEXT:  ('r_addend', 0xfffffffc)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 0x00000001
+// CHECK-NEXT: (('r_offset', 0x00000006)
+// CHECK-NEXT:  ('r_sym', 0x00000005)
+// CHECK-NEXT:  ('r_type', 0x00000002)
+// CHECK-NEXT:  ('r_addend', 0xfffffffc)
+// CHECK-NEXT: ),
+
 // CHECK:      # Symbol 0x00000004
 // CHECK-NEXT: (('st_name', 0x00000009) # '_GLOBAL_OFFSET_TABLE_'
 // CHECK-NEXT:  ('st_bind', 0x00000001)
@@ -34,16 +47,3 @@
 // CHECK-NEXT:  ('st_value', 0x0000000000000000)
 // CHECK-NEXT:  ('st_size', 0x0000000000000000)
 // CHECK-NEXT: ),
-
-// CHECK:      # Relocation 0x00000000
-// CHECK-NEXT: (('r_offset', 0x00000001)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x00000004)
-// CHECK-NEXT:  ('r_addend', 0xfffffffc)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Relocation 0x00000001
-// CHECK-NEXT: (('r_offset', 0x00000006)
-// CHECK-NEXT:  ('r_sym', 0x00000005)
-// CHECK-NEXT:  ('r_type', 0x00000002)
-// CHECK-NEXT:  ('r_addend', 0xfffffffc)
-// CHECK-NEXT: ),
diff --git a/test/MC/ELF/weakref.s b/test/MC/ELF/weakref.s
index aea10d1..9100073 100644
--- a/test/MC/ELF/weakref.s
+++ b/test/MC/ELF/weakref.s
@@ -128,7 +128,7 @@ bar15:
 // CHECK-NEXT:   ('st_bind', 0x00000000)
 // CHECK-NEXT:   ('st_type', 0x00000003)
 // CHECK-NEXT:   ('st_other', 0x00000000)
-// CHECK-NEXT:   ('st_shndx', 0x00000002)
+// CHECK-NEXT:   ('st_shndx', 0x00000003)
 // CHECK-NEXT:   ('st_value', 0x0000000000000000)
 // CHECK-NEXT:   ('st_size', 0x0000000000000000)
 // CHECK-NEXT:  ),
@@ -137,7 +137,7 @@ bar15:
 // CHECK-NEXT:   ('st_bind', 0x00000000)
 // CHECK-NEXT:   ('st_type', 0x00000003)
 // CHECK-NEXT:   ('st_other', 0x00000000)
-// CHECK-NEXT:   ('st_shndx', 0x00000003)
+// CHECK-NEXT:   ('st_shndx', 0x00000004)
 // CHECK-NEXT:   ('st_value', 0x0000000000000000)
 // CHECK-NEXT:   ('st_size', 0x0000000000000000)
 // CHECK-NEXT:  ),
diff --git a/test/MC/MachO/darwin-x86_64-diff-relocs.s b/test/MC/MachO/darwin-x86_64-diff-relocs.s
index 449d2f5..f5d93ae 100644
--- a/test/MC/MachO/darwin-x86_64-diff-relocs.s
+++ b/test/MC/MachO/darwin-x86_64-diff-relocs.s
@@ -157,7 +157,7 @@ L3:
 // FIXME: Unfortunately, we do not get these relocations in exactly the same
 // order as Darwin 'as'. It turns out that 'as' *usually* ends up emitting
 // them in reverse address order, but sometimes it allocates some
-// additional relocations late so these end up preceed the other entries. I
+// additional relocations late so these end up precede the other entries. I
 // haven't figured out the exact criteria for this yet.
         
 // CHECK:     (('word-0', 0x56),
diff --git a/test/MC/MachO/section-attributes.s b/test/MC/MachO/section-attributes.s
new file mode 100644
index 0000000..b21ef38
--- /dev/null
+++ b/test/MC/MachO/section-attributes.s
@@ -0,0 +1,7 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o %t
+// RUN: macho-dump %t | FileCheck %s
+
+// CHECK: # Section 1
+// CHECK: ('flags', 0x0)
+.section __TEXT,__objc_opt_ro
+.long 0
diff --git a/test/MC/MachO/temp-labels.s b/test/MC/MachO/temp-labels.s
new file mode 100644
index 0000000..b7382b7
--- /dev/null
+++ b/test/MC/MachO/temp-labels.s
@@ -0,0 +1,33 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -L -o - | macho-dump --dump-section-data | FileCheck %s
+
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 296)
+// CHECK:   ('nsyms', 2)
+// CHECK:   ('stroff', 328)
+// CHECK:   ('strsize', 8)
+// CHECK:   ('_string_data', '\x00_f0\x00L0\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_f0')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 5)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 4)
+// CHECK:     ('_string', 'L0')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+_f0:
+        .long 0
+L0:
+        .long 0
diff --git a/test/MC/MachO/variable-errors.s b/test/MC/MachO/variable-errors.s
new file mode 100644
index 0000000..28308c69
--- /dev/null
+++ b/test/MC/MachO/variable-errors.s
@@ -0,0 +1,8 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o %t.o 2> %t.err
+// RUN: FileCheck < %t.err %s
+
+        .data
+t0_a:
+t0_x = t0_a - t0_b
+// CHECK: unable to evaluate offset to undefined symbol 't0_b'
+	.long	t0_x
diff --git a/test/MC/MachO/variable-exprs.s b/test/MC/MachO/variable-exprs.s
new file mode 100644
index 0000000..8eeb82f
--- /dev/null
+++ b/test/MC/MachO/variable-exprs.s
@@ -0,0 +1,446 @@
+// RUN: llvm-mc -triple i386-apple-darwin10 %s -filetype=obj -o %t.o
+// RUN: macho-dump --dump-section-data < %t.o > %t.dump
+// RUN: FileCheck --check-prefix=CHECK-I386 < %t.dump %s
+
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o %t.o
+// RUN: macho-dump --dump-section-data < %t.o > %t.dump
+// RUN: FileCheck --check-prefix=CHECK-X86_64 < %t.dump %s
+
+.data
+
+        .long 0
+a:
+        .long 0
+b = a
+
+c:      .long b
+
+d2 = d
+.globl d2
+d3 = d + 4
+.globl d3
+
+e = a + 4
+
+g:
+f = g
+        .long 0
+        
+        .long b
+        .long e
+        .long a + 4
+        .long d
+        .long d2
+        .long d3
+        .long f
+        .long g
+
+///
+        .text
+t0:
+Lt0_a:
+        ret
+
+	.data
+Lt0_b:
+Lt0_x = Lt0_a - Lt0_b
+	.quad	Lt0_x
+
+// CHECK-I386: ('cputype', 7)
+// CHECK-I386: ('cpusubtype', 3)
+// CHECK-I386: ('filetype', 1)
+// CHECK-I386: ('num_load_commands', 3)
+// CHECK-I386: ('load_commands_size', 296)
+// CHECK-I386: ('flag', 0)
+// CHECK-I386: ('load_commands', [
+// CHECK-I386:   # Load Command 0
+// CHECK-I386:  (('command', 1)
+// CHECK-I386:   ('size', 192)
+// CHECK-I386:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-I386:   ('vm_addr', 0)
+// CHECK-I386:   ('vm_size', 57)
+// CHECK-I386:   ('file_offset', 324)
+// CHECK-I386:   ('file_size', 57)
+// CHECK-I386:   ('maxprot', 7)
+// CHECK-I386:   ('initprot', 7)
+// CHECK-I386:   ('num_sections', 2)
+// CHECK-I386:   ('flags', 0)
+// CHECK-I386:   ('sections', [
+// CHECK-I386:     # Section 0
+// CHECK-I386:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-I386:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-I386:     ('address', 0)
+// CHECK-I386:     ('size', 1)
+// CHECK-I386:     ('offset', 324)
+// CHECK-I386:     ('alignment', 0)
+// CHECK-I386:     ('reloc_offset', 0)
+// CHECK-I386:     ('num_reloc', 0)
+// CHECK-I386:     ('flags', 0x80000400)
+// CHECK-I386:     ('reserved1', 0)
+// CHECK-I386:     ('reserved2', 0)
+// CHECK-I386:    ),
+// CHECK-I386:   ('_relocations', [
+// CHECK-I386:   ])
+// CHECK-I386:   ('_section_data', 'c3')
+// CHECK-I386:     # Section 1
+// CHECK-I386:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-I386:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-I386:     ('address', 1)
+// CHECK-I386:     ('size', 56)
+// CHECK-I386:     ('offset', 325)
+// CHECK-I386:     ('alignment', 0)
+// CHECK-I386:     ('reloc_offset', 384)
+// CHECK-I386:     ('num_reloc', 9)
+// CHECK-I386:     ('flags', 0x0)
+// CHECK-I386:     ('reserved1', 0)
+// CHECK-I386:     ('reserved2', 0)
+// CHECK-I386:    ),
+// CHECK-I386:   ('_relocations', [
+// CHECK-I386:     # Relocation 0
+// CHECK-I386:     (('word-0', 0x2c),
+// CHECK-I386:      ('word-1', 0x4000002)),
+// CHECK-I386:     # Relocation 1
+// CHECK-I386:     (('word-0', 0x28),
+// CHECK-I386:      ('word-1', 0x4000002)),
+// CHECK-I386:     # Relocation 2
+// CHECK-I386:     (('word-0', 0x24),
+// CHECK-I386:      ('word-1', 0xc000009)),
+// CHECK-I386:     # Relocation 3
+// CHECK-I386:     (('word-0', 0x20),
+// CHECK-I386:      ('word-1', 0xc000008)),
+// CHECK-I386:     # Relocation 4
+// CHECK-I386:     (('word-0', 0x1c),
+// CHECK-I386:      ('word-1', 0xc000007)),
+// CHECK-I386:     # Relocation 5
+// CHECK-I386:     (('word-0', 0xa0000018),
+// CHECK-I386:      ('word-1', 0x5)),
+// CHECK-I386:     # Relocation 6
+// CHECK-I386:     (('word-0', 0x14),
+// CHECK-I386:      ('word-1', 0x4000002)),
+// CHECK-I386:     # Relocation 7
+// CHECK-I386:     (('word-0', 0x10),
+// CHECK-I386:      ('word-1', 0x4000002)),
+// CHECK-I386:     # Relocation 8
+// CHECK-I386:     (('word-0', 0x8),
+// CHECK-I386:      ('word-1', 0x4000002)),
+// CHECK-I386:   ])
+// CHECK-I386:   ('_section_data', '00000000 00000000 05000000 00000000 05000000 09000000 09000000 00000000 00000000 00000000 0d000000 0d000000 cfffffff ffffffff')
+// CHECK-I386:   ])
+// CHECK-I386:  ),
+// CHECK-I386:   # Load Command 1
+// CHECK-I386:  (('command', 2)
+// CHECK-I386:   ('size', 24)
+// CHECK-I386:   ('symoff', 456)
+// CHECK-I386:   ('nsyms', 10)
+// CHECK-I386:   ('stroff', 576)
+// CHECK-I386:   ('strsize', 24)
+// CHECK-I386:   ('_string_data', '\x00d2\x00d\x00d3\x00a\x00b\x00c\x00e\x00g\x00f\x00t0\x00')
+// CHECK-I386:   ('_symbols', [
+// CHECK-I386:     # Symbol 0
+// CHECK-I386:    (('n_strx', 9)
+// CHECK-I386:     ('n_type', 0xe)
+// CHECK-I386:     ('n_sect', 2)
+// CHECK-I386:     ('n_desc', 0)
+// CHECK-I386:     ('n_value', 5)
+// CHECK-I386:     ('_string', 'a')
+// CHECK-I386:    ),
+// CHECK-I386:     # Symbol 1
+// CHECK-I386:    (('n_strx', 11)
+// CHECK-I386:     ('n_type', 0xe)
+// CHECK-I386:     ('n_sect', 2)
+// CHECK-I386:     ('n_desc', 0)
+// CHECK-I386:     ('n_value', 5)
+// CHECK-I386:     ('_string', 'b')
+// CHECK-I386:    ),
+// CHECK-I386:     # Symbol 2
+// CHECK-I386:    (('n_strx', 13)
+// CHECK-I386:     ('n_type', 0xe)
+// CHECK-I386:     ('n_sect', 2)
+// CHECK-I386:     ('n_desc', 0)
+// CHECK-I386:     ('n_value', 9)
+// CHECK-I386:     ('_string', 'c')
+// CHECK-I386:    ),
+// CHECK-I386:     # Symbol 3
+// CHECK-I386:    (('n_strx', 15)
+// CHECK-I386:     ('n_type', 0xe)
+// CHECK-I386:     ('n_sect', 2)
+// CHECK-I386:     ('n_desc', 0)
+// CHECK-I386:     ('n_value', 9)
+// CHECK-I386:     ('_string', 'e')
+// CHECK-I386:    ),
+// CHECK-I386:     # Symbol 4
+// CHECK-I386:    (('n_strx', 17)
+// CHECK-I386:     ('n_type', 0xe)
+// CHECK-I386:     ('n_sect', 2)
+// CHECK-I386:     ('n_desc', 0)
+// CHECK-I386:     ('n_value', 13)
+// CHECK-I386:     ('_string', 'g')
+// CHECK-I386:    ),
+// CHECK-I386:     # Symbol 5
+// CHECK-I386:    (('n_strx', 19)
+// CHECK-I386:     ('n_type', 0xe)
+// CHECK-I386:     ('n_sect', 2)
+// CHECK-I386:     ('n_desc', 0)
+// CHECK-I386:     ('n_value', 13)
+// CHECK-I386:     ('_string', 'f')
+// CHECK-I386:    ),
+// CHECK-I386:     # Symbol 6
+// CHECK-I386:    (('n_strx', 21)
+// CHECK-I386:     ('n_type', 0xe)
+// CHECK-I386:     ('n_sect', 1)
+// CHECK-I386:     ('n_desc', 0)
+// CHECK-I386:     ('n_value', 0)
+// CHECK-I386:     ('_string', 't0')
+// CHECK-I386:    ),
+// CHECK-I386:     # Symbol 7
+// CHECK-I386:    (('n_strx', 4)
+// CHECK-I386:     ('n_type', 0x1)
+// CHECK-I386:     ('n_sect', 0)
+// CHECK-I386:     ('n_desc', 0)
+// CHECK-I386:     ('n_value', 0)
+// CHECK-I386:     ('_string', 'd')
+// CHECK-I386:    ),
+// CHECK-I386:     # Symbol 8
+// CHECK-I386:    (('n_strx', 1)
+// CHECK-I386:     ('n_type', 0x1)
+// CHECK-I386:     ('n_sect', 0)
+// CHECK-I386:     ('n_desc', 0)
+// CHECK-I386:     ('n_value', 0)
+// CHECK-I386:     ('_string', 'd2')
+// CHECK-I386:    ),
+// CHECK-I386:     # Symbol 9
+// CHECK-I386:    (('n_strx', 6)
+// CHECK-I386:     ('n_type', 0x1)
+// CHECK-I386:     ('n_sect', 0)
+// CHECK-I386:     ('n_desc', 0)
+// CHECK-I386:     ('n_value', 0)
+// CHECK-I386:     ('_string', 'd3')
+// CHECK-I386:    ),
+// CHECK-I386:   ])
+// CHECK-I386:  ),
+// CHECK-I386:   # Load Command 2
+// CHECK-I386:  (('command', 11)
+// CHECK-I386:   ('size', 80)
+// CHECK-I386:   ('ilocalsym', 0)
+// CHECK-I386:   ('nlocalsym', 7)
+// CHECK-I386:   ('iextdefsym', 7)
+// CHECK-I386:   ('nextdefsym', 0)
+// CHECK-I386:   ('iundefsym', 7)
+// CHECK-I386:   ('nundefsym', 3)
+// CHECK-I386:   ('tocoff', 0)
+// CHECK-I386:   ('ntoc', 0)
+// CHECK-I386:   ('modtaboff', 0)
+// CHECK-I386:   ('nmodtab', 0)
+// CHECK-I386:   ('extrefsymoff', 0)
+// CHECK-I386:   ('nextrefsyms', 0)
+// CHECK-I386:   ('indirectsymoff', 0)
+// CHECK-I386:   ('nindirectsyms', 0)
+// CHECK-I386:   ('extreloff', 0)
+// CHECK-I386:   ('nextrel', 0)
+// CHECK-I386:   ('locreloff', 0)
+// CHECK-I386:   ('nlocrel', 0)
+// CHECK-I386:   ('_indirect_symbols', [
+// CHECK-I386:   ])
+// CHECK-I386:  ),
+// CHECK-I386: ])
+
+// CHECK-X86_64: ('cputype', 16777223)
+// CHECK-X86_64: ('cpusubtype', 3)
+// CHECK-X86_64: ('filetype', 1)
+// CHECK-X86_64: ('num_load_commands', 3)
+// CHECK-X86_64: ('load_commands_size', 336)
+// CHECK-X86_64: ('flag', 0)
+// CHECK-X86_64: ('reserved', 0)
+// CHECK-X86_64: ('load_commands', [
+// CHECK-X86_64:   # Load Command 0
+// CHECK-X86_64:  (('command', 25)
+// CHECK-X86_64:   ('size', 232)
+// CHECK-X86_64:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-X86_64:   ('vm_addr', 0)
+// CHECK-X86_64:   ('vm_size', 57)
+// CHECK-X86_64:   ('file_offset', 368)
+// CHECK-X86_64:   ('file_size', 57)
+// CHECK-X86_64:   ('maxprot', 7)
+// CHECK-X86_64:   ('initprot', 7)
+// CHECK-X86_64:   ('num_sections', 2)
+// CHECK-X86_64:   ('flags', 0)
+// CHECK-X86_64:   ('sections', [
+// CHECK-X86_64:     # Section 0
+// CHECK-X86_64:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-X86_64:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-X86_64:     ('address', 0)
+// CHECK-X86_64:     ('size', 1)
+// CHECK-X86_64:     ('offset', 368)
+// CHECK-X86_64:     ('alignment', 0)
+// CHECK-X86_64:     ('reloc_offset', 0)
+// CHECK-X86_64:     ('num_reloc', 0)
+// CHECK-X86_64:     ('flags', 0x80000400)
+// CHECK-X86_64:     ('reserved1', 0)
+// CHECK-X86_64:     ('reserved2', 0)
+// CHECK-X86_64:     ('reserved3', 0)
+// CHECK-X86_64:    ),
+// CHECK-X86_64:   ('_relocations', [
+// CHECK-X86_64:   ])
+// CHECK-X86_64:   ('_section_data', 'c3')
+// CHECK-X86_64:     # Section 1
+// CHECK-X86_64:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-X86_64:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-X86_64:     ('address', 1)
+// CHECK-X86_64:     ('size', 56)
+// CHECK-X86_64:     ('offset', 369)
+// CHECK-X86_64:     ('alignment', 0)
+// CHECK-X86_64:     ('reloc_offset', 428)
+// CHECK-X86_64:     ('num_reloc', 9)
+// CHECK-X86_64:     ('flags', 0x0)
+// CHECK-X86_64:     ('reserved1', 0)
+// CHECK-X86_64:     ('reserved2', 0)
+// CHECK-X86_64:     ('reserved3', 0)
+// CHECK-X86_64:    ),
+// CHECK-X86_64:   ('_relocations', [
+// CHECK-X86_64:     # Relocation 0
+// CHECK-X86_64:     (('word-0', 0x2c),
+// CHECK-X86_64:      ('word-1', 0xc000004)),
+// CHECK-X86_64:     # Relocation 1
+// CHECK-X86_64:     (('word-0', 0x28),
+// CHECK-X86_64:      ('word-1', 0xc000005)),
+// CHECK-X86_64:     # Relocation 2
+// CHECK-X86_64:     (('word-0', 0x24),
+// CHECK-X86_64:      ('word-1', 0xc000009)),
+// CHECK-X86_64:     # Relocation 3
+// CHECK-X86_64:     (('word-0', 0x20),
+// CHECK-X86_64:      ('word-1', 0xc000008)),
+// CHECK-X86_64:     # Relocation 4
+// CHECK-X86_64:     (('word-0', 0x1c),
+// CHECK-X86_64:      ('word-1', 0xc000007)),
+// CHECK-X86_64:     # Relocation 5
+// CHECK-X86_64:     (('word-0', 0x18),
+// CHECK-X86_64:      ('word-1', 0xc000000)),
+// CHECK-X86_64:     # Relocation 6
+// CHECK-X86_64:     (('word-0', 0x14),
+// CHECK-X86_64:      ('word-1', 0xc000003)),
+// CHECK-X86_64:     # Relocation 7
+// CHECK-X86_64:     (('word-0', 0x10),
+// CHECK-X86_64:      ('word-1', 0xc000001)),
+// CHECK-X86_64:     # Relocation 8
+// CHECK-X86_64:     (('word-0', 0x8),
+// CHECK-X86_64:      ('word-1', 0xc000001)),
+// CHECK-X86_64:   ])
+// CHECK-X86_64:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000 04000000 00000000 00000000 00000000 00000000 00000000 cfffffff ffffffff')
+// CHECK-X86_64:   ])
+// CHECK-X86_64:  ),
+// CHECK-X86_64:   # Load Command 1
+// CHECK-X86_64:  (('command', 2)
+// CHECK-X86_64:   ('size', 24)
+// CHECK-X86_64:   ('symoff', 500)
+// CHECK-X86_64:   ('nsyms', 10)
+// CHECK-X86_64:   ('stroff', 660)
+// CHECK-X86_64:   ('strsize', 24)
+// CHECK-X86_64:   ('_string_data', '\x00d2\x00d\x00d3\x00a\x00b\x00c\x00e\x00g\x00f\x00t0\x00')
+// CHECK-X86_64:   ('_symbols', [
+// CHECK-X86_64:     # Symbol 0
+// CHECK-X86_64:    (('n_strx', 9)
+// CHECK-X86_64:     ('n_type', 0xe)
+// CHECK-X86_64:     ('n_sect', 2)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 5)
+// CHECK-X86_64:     ('_string', 'a')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 1
+// CHECK-X86_64:    (('n_strx', 11)
+// CHECK-X86_64:     ('n_type', 0xe)
+// CHECK-X86_64:     ('n_sect', 2)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 5)
+// CHECK-X86_64:     ('_string', 'b')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 2
+// CHECK-X86_64:    (('n_strx', 13)
+// CHECK-X86_64:     ('n_type', 0xe)
+// CHECK-X86_64:     ('n_sect', 2)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 9)
+// CHECK-X86_64:     ('_string', 'c')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 3
+// CHECK-X86_64:    (('n_strx', 15)
+// CHECK-X86_64:     ('n_type', 0xe)
+// CHECK-X86_64:     ('n_sect', 2)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 9)
+// CHECK-X86_64:     ('_string', 'e')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 4
+// CHECK-X86_64:    (('n_strx', 17)
+// CHECK-X86_64:     ('n_type', 0xe)
+// CHECK-X86_64:     ('n_sect', 2)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 13)
+// CHECK-X86_64:     ('_string', 'g')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 5
+// CHECK-X86_64:    (('n_strx', 19)
+// CHECK-X86_64:     ('n_type', 0xe)
+// CHECK-X86_64:     ('n_sect', 2)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 13)
+// CHECK-X86_64:     ('_string', 'f')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 6
+// CHECK-X86_64:    (('n_strx', 21)
+// CHECK-X86_64:     ('n_type', 0xe)
+// CHECK-X86_64:     ('n_sect', 1)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 0)
+// CHECK-X86_64:     ('_string', 't0')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 7
+// CHECK-X86_64:    (('n_strx', 4)
+// CHECK-X86_64:     ('n_type', 0x1)
+// CHECK-X86_64:     ('n_sect', 0)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 0)
+// CHECK-X86_64:     ('_string', 'd')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 8
+// CHECK-X86_64:    (('n_strx', 1)
+// CHECK-X86_64:     ('n_type', 0x1)
+// CHECK-X86_64:     ('n_sect', 0)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 0)
+// CHECK-X86_64:     ('_string', 'd2')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 9
+// CHECK-X86_64:    (('n_strx', 6)
+// CHECK-X86_64:     ('n_type', 0x1)
+// CHECK-X86_64:     ('n_sect', 0)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 0)
+// CHECK-X86_64:     ('_string', 'd3')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:   ])
+// CHECK-X86_64:  ),
+// CHECK-X86_64:   # Load Command 2
+// CHECK-X86_64:  (('command', 11)
+// CHECK-X86_64:   ('size', 80)
+// CHECK-X86_64:   ('ilocalsym', 0)
+// CHECK-X86_64:   ('nlocalsym', 7)
+// CHECK-X86_64:   ('iextdefsym', 7)
+// CHECK-X86_64:   ('nextdefsym', 0)
+// CHECK-X86_64:   ('iundefsym', 7)
+// CHECK-X86_64:   ('nundefsym', 3)
+// CHECK-X86_64:   ('tocoff', 0)
+// CHECK-X86_64:   ('ntoc', 0)
+// CHECK-X86_64:   ('modtaboff', 0)
+// CHECK-X86_64:   ('nmodtab', 0)
+// CHECK-X86_64:   ('extrefsymoff', 0)
+// CHECK-X86_64:   ('nextrefsyms', 0)
+// CHECK-X86_64:   ('indirectsymoff', 0)
+// CHECK-X86_64:   ('nindirectsyms', 0)
+// CHECK-X86_64:   ('extreloff', 0)
+// CHECK-X86_64:   ('nextrel', 0)
+// CHECK-X86_64:   ('locreloff', 0)
+// CHECK-X86_64:   ('nlocrel', 0)
+// CHECK-X86_64:   ('_indirect_symbols', [
+// CHECK-X86_64:   ])
+// CHECK-X86_64:  ),
+// CHECK-X86_64: ])
diff --git a/test/MC/X86/padlock.s b/test/MC/X86/padlock.s
new file mode 100644
index 0000000..874786f
--- /dev/null
+++ b/test/MC/X86/padlock.s
@@ -0,0 +1,53 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+	xstore
+// CHECK: xstore
+// CHECK: encoding: [0x0f,0xa7,0xc0]
+
+	rep xcryptecb
+// CHECK: rep
+// CHECK: encoding: [0xf3]
+// CHECK: xcryptecb
+// CHECK: encoding: [0x0f,0xa7,0xc8]
+
+	rep xcryptcbc
+// CHECK: rep
+// CHECK: encoding: [0xf3]
+// CHECK: xcryptcbc
+// CHECK: encoding: [0x0f,0xa7,0xd0]
+
+	rep xcryptctr
+// CHECK: rep
+// CHECK: encoding: [0xf3]
+// CHECK: xcryptctr
+// CHECK: encoding: [0x0f,0xa7,0xd8]
+
+	rep xcryptcfb
+// CHECK: rep
+// CHECK: encoding: [0xf3]
+// CHECK: xcryptcfb
+// CHECK: encoding: [0x0f,0xa7,0xe0]
+
+	rep xcryptofb
+// CHECK: rep
+// CHECK: encoding: [0xf3]
+// CHECK: xcryptofb
+// CHECK: encoding: [0x0f,0xa7,0xe8]
+
+	rep xsha1
+// CHECK: rep
+// CHECK: encoding: [0xf3]
+// CHECK: xsha1
+// CHECK: encoding: [0x0f,0xa6,0xc8]
+
+	rep xsha256
+// CHECK: rep
+// CHECK: encoding: [0xf3]
+// CHECK: xsha256
+// CHECK: encoding: [0x0f,0xa6,0xd0]
+
+	rep montmul
+// CHECK: rep
+// CHECK: encoding: [0xf3]
+// CHECK: montmul
+// CHECK: encoding: [0x0f,0xa6,0xc0]
diff --git a/test/MC/X86/x86-32-coverage.s b/test/MC/X86/x86-32-coverage.s
index 4ec9fcd..4ac7efd 100644
--- a/test/MC/X86/x86-32-coverage.s
+++ b/test/MC/X86/x86-32-coverage.s
@@ -372,6 +372,14 @@
 // CHECK: 	nop
         	nop
 
+// CHECK: flds	(%edi)
+// CHECK:  encoding: [0xd9,0x07]
+        	flds	(%edi)
+
+// CHECK: filds	(%edi)
+// CHECK:  encoding: [0xdf,0x07]
+        	filds	(%edi)
+
 // CHECK: 	fldl	3735928559(%ebx,%ecx,8)
         	fldl	0xdeadbeef(%ebx,%ecx,8)
 
diff --git a/test/MC/X86/x86-32.s b/test/MC/X86/x86-32.s
index 723983d..28900bb 100644
--- a/test/MC/X86/x86-32.s
+++ b/test/MC/X86/x86-32.s
@@ -613,11 +613,11 @@ pshufw $90, %mm4, %mm0
 // CHECK:  encoding: [0xd5,0x01]
         	aad	$1
 
-// CHECK: aad	$10
+// CHECK: aad
 // CHECK:  encoding: [0xd5,0x0a]
         	aad	$0xA
 
-// CHECK: aad	$10
+// CHECK: aad
 // CHECK:  encoding: [0xd5,0x0a]
         	aad
 
@@ -625,11 +625,11 @@ pshufw $90, %mm4, %mm0
 // CHECK:  encoding: [0xd4,0x02]
         	aam	$2
 
-// CHECK: aam	$10
+// CHECK: aam
 // CHECK:  encoding: [0xd4,0x0a]
         	aam	$0xA
 
-// CHECK: aam	$10
+// CHECK: aam
 // CHECK:  encoding: [0xd4,0x0a]
         	aam
 
@@ -725,7 +725,7 @@ pshufw $90, %mm4, %mm0
 // CHECK:  encoding: [0xdf,0xf2]
         	fcompi	%st(2)
 
-// CHECK: fcompi	%st(1)
+// CHECK: fcompi
 // CHECK:  encoding: [0xdf,0xf1]
         	fcompi
 
@@ -737,7 +737,7 @@ pshufw $90, %mm4, %mm0
 // CHECK:  encoding: [0xdf,0xea]
         	fucompi	%st(2)
 
-// CHECK: fucompi	%st(1)
+// CHECK: fucompi
 // CHECK:  encoding: [0xdf,0xe9]
         	fucompi
 
@@ -816,3 +816,121 @@ pshufw $90, %mm4, %mm0
 // CHECK: loopne 0
 // CHECK: encoding: [0xe0,A]
 	loopnz 0
+
+// CHECK: outsb # encoding: [0x6e]
+// CHECK: outsb
+// CHECK: outsb
+	outsb
+	outsb	%ds:(%esi), %dx
+	outsb	(%esi), %dx
+
+// CHECK: outsw # encoding: [0x66,0x6f]
+// CHECK: outsw
+// CHECK: outsw
+	outsw
+	outsw	%ds:(%esi), %dx
+	outsw	(%esi), %dx
+
+// CHECK: outsl # encoding: [0x6f]
+// CHECK: outsl
+	outsl
+	outsl	%ds:(%esi), %dx
+	outsl	(%esi), %dx
+
+// CHECK: insb # encoding: [0x6c]
+// CHECK: insb
+	insb
+	insb	%dx, %es:(%edi)
+
+// CHECK: insw # encoding: [0x66,0x6d]
+// CHECK: insw
+	insw
+	insw	%dx, %es:(%edi)
+
+// CHECK: insl # encoding: [0x6d]
+// CHECK: insl
+	insl
+	insl	%dx, %es:(%edi)
+
+// CHECK: movsb # encoding: [0xa4]
+// CHECK: movsb
+// CHECK: movsb
+	movsb
+	movsb	%ds:(%esi), %es:(%edi)
+	movsb	(%esi), %es:(%edi)
+
+// CHECK: movsw # encoding: [0x66,0xa5]
+// CHECK: movsw
+// CHECK: movsw
+	movsw
+	movsw	%ds:(%esi), %es:(%edi)
+	movsw	(%esi), %es:(%edi)
+
+// CHECK: movsd # encoding: [0xa5]
+// CHECK: movsd
+// CHECK: movsd
+	movsl
+	movsl	%ds:(%esi), %es:(%edi)
+	movsl	(%esi), %es:(%edi)
+
+// CHECK: lodsb # encoding: [0xac]
+// CHECK: lodsb
+// CHECK: lodsb
+// CHECK: lodsb
+// CHECK: lodsb
+	lodsb
+	lodsb	%ds:(%esi), %al
+	lodsb	(%esi), %al
+	lods	%ds:(%esi), %al
+	lods	(%esi), %al
+
+// CHECK: lodsw # encoding: [0x66,0xad]
+// CHECK: lodsw
+// CHECK: lodsw
+// CHECK: lodsw
+// CHECK: lodsw
+	lodsw
+	lodsw	%ds:(%esi), %ax
+	lodsw	(%esi), %ax
+	lods	%ds:(%esi), %ax
+	lods	(%esi), %ax
+
+// CHECK: lodsl # encoding: [0xad]
+// CHECK: lodsl
+// CHECK: lodsl
+// CHECK: lodsl
+// CHECK: lodsl
+	lodsl
+	lodsl	%ds:(%esi), %eax
+	lodsl	(%esi), %eax
+	lods	%ds:(%esi), %eax
+	lods	(%esi), %eax
+
+// CHECK: stosb # encoding: [0xaa]
+// CHECK: stosb
+// CHECK: stosb
+	stosb
+	stosb	%al, %es:(%edi)
+	stos	%al, %es:(%edi)
+
+// CHECK: stosw # encoding: [0x66,0xab]
+// CHECK: stosw
+// CHECK: stosw
+	stosw
+	stosw	%ax, %es:(%edi)
+	stos	%ax, %es:(%edi)
+
+// CHECK: stosl # encoding: [0xab]
+// CHECK: stosl
+// CHECK: stosl
+	stosl
+	stosl	%eax, %es:(%edi)
+	stos	%eax, %es:(%edi)
+
+// CHECK: strw
+// CHECK: encoding: [0x66,0x0f,0x00,0xc8]
+	str %ax
+
+// CHECK: strl
+// CHECK: encoding: [0x0f,0x00,0xc8]
+	str %eax
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index ee9757f..fe08559 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -112,12 +112,12 @@
 // rdar://8470918
 smovb // CHECK: movsb
 smovw // CHECK: movsw
-smovl // CHECK: movsl
+smovl // CHECK: movsd
 smovq // CHECK: movsq
 
 // rdar://8456361
 // CHECK: rep
-// CHECK: movsl
+// CHECK: movsd
         rep movsd
 
 // CHECK: rep
@@ -190,6 +190,10 @@ fadd %st(7)
 // CHECK: int3
 INT3
 
+// rdar://8735979 - int $3 -> int3
+// CHECK: int3
+int	$3
+
 
 // Allow scale factor without index register.
 // CHECK: movaps	%xmm3, (%esi)
@@ -228,10 +232,10 @@ cmovnzq %rbx, %rax
 
 // rdar://8407928
 // CHECK: inb	$127, %al
-// CHECK: inw	%dx, %ax
+// CHECK: inw	%dx
 // CHECK: outb	%al, $127
-// CHECK: outw	%ax, %dx
-// CHECK: inl	%dx, %eax
+// CHECK: outw	%dx
+// CHECK: inl	%dx
 inb	$0x7f
 inw	%dx
 outb	$0x7f
@@ -240,12 +244,12 @@ inl	%dx
 
 
 // PR8114
-// CHECK: outb	%al, %dx
-// CHECK: outb	%al, %dx
-// CHECK: outw	%ax, %dx
-// CHECK: outw	%ax, %dx
-// CHECK: outl	%eax, %dx
-// CHECK: outl	%eax, %dx
+// CHECK: outb	%dx
+// CHECK: outb	%dx
+// CHECK: outw	%dx
+// CHECK: outw	%dx
+// CHECK: outl	%dx
+// CHECK: outl	%dx
 
 out	%al, (%dx)
 outb	%al, (%dx)
@@ -254,12 +258,12 @@ outw	%ax, (%dx)
 out	%eax, (%dx)
 outl	%eax, (%dx)
 
-// CHECK: inb	%dx, %al
-// CHECK: inb	%dx, %al
-// CHECK: inw	%dx, %ax
-// CHECK: inw	%dx, %ax
-// CHECK: inl	%dx, %eax
-// CHECK: inl	%dx, %eax
+// CHECK: inb	%dx
+// CHECK: inb	%dx
+// CHECK: inw	%dx
+// CHECK: inw	%dx
+// CHECK: inl	%dx
+// CHECK: inl	%dx
 
 in	(%dx), %al
 inb	(%dx), %al
@@ -270,16 +274,16 @@ inl	(%dx), %eax
 
 // rdar://8431422
 
-// CHECK: fxch	%st(1)
-// CHECK: fucom	%st(1)
-// CHECK: fucomp	%st(1)
-// CHECK: faddp	%st(1)
+// CHECK: fxch
+// CHECK: fucom
+// CHECK: fucomp
+// CHECK: faddp
 // CHECK: faddp	%st(0)
-// CHECK: fsubp	%st(1)
-// CHECK: fsubrp	%st(1)
-// CHECK: fmulp	%st(1)
-// CHECK: fdivp	%st(1)
-// CHECK: fdivrp	%st(1)
+// CHECK: fsubp
+// CHECK: fsubrp
+// CHECK: fmulp
+// CHECK: fdivp
+// CHECK: fdivrp
 
 fxch
 fucom
@@ -292,11 +296,11 @@ fmulp
 fdivp
 fdivrp
 
-// CHECK: fcomi	%st(1)
+// CHECK: fcomi
 // CHECK: fcomi	%st(2)
-// CHECK: fucomi	%st(1)
-// CHECK: fucomi	%st(2)
-// CHECK: fucomi	%st(2)
+// CHECK: fucomi
+// CHECK: fucomi %st(2)
+// CHECK: fucomi %st(2)
 
 fcomi
 fcomi	%st(2)
@@ -600,7 +604,7 @@ movsq
 // CHECK:   encoding: [0x48,0xa5]
 
 movsl
-// CHECK: movsl
+// CHECK: movsd
 // CHECK:   encoding: [0xa5]
 
 stosq
@@ -786,7 +790,7 @@ lock/incl 1(%rsp)
 rep movsl
 // CHECK: rep
 // CHECK: encoding: [0xf3]
-// CHECK: movsl
+// CHECK: movsd
 // CHECK: encoding: [0xa5]
 
 
@@ -973,3 +977,154 @@ xsetbv // CHECK: xsetbv # encoding: [0x0f,0x01,0xd1]
 // CHECK: loopne 0
 // CHECK: encoding: [0xe0,A]
 	loopnz 0
+
+// CHECK: outsb # encoding: [0x6e]
+// CHECK: outsb
+// CHECK: outsb
+	outsb
+	outsb	%ds:(%rsi), %dx
+	outsb	(%rsi), %dx
+
+// CHECK: outsw # encoding: [0x66,0x6f]
+// CHECK: outsw
+// CHECK: outsw
+	outsw
+	outsw	%ds:(%rsi), %dx
+	outsw	(%rsi), %dx
+
+// CHECK: outsl # encoding: [0x6f]
+// CHECK: outsl
+	outsl
+	outsl	%ds:(%rsi), %dx
+	outsl	(%rsi), %dx
+
+// CHECK: insb # encoding: [0x6c]
+// CHECK: insb
+	insb
+	insb	%dx, %es:(%rdi)
+
+// CHECK: insw # encoding: [0x66,0x6d]
+// CHECK: insw
+	insw
+	insw	%dx, %es:(%rdi)
+
+// CHECK: insl # encoding: [0x6d]
+// CHECK: insl
+	insl
+	insl	%dx, %es:(%rdi)
+
+// CHECK: movsb # encoding: [0xa4]
+// CHECK: movsb
+// CHECK: movsb
+	movsb
+	movsb	%ds:(%rsi), %es:(%rdi)
+	movsb	(%rsi), %es:(%rdi)
+
+// CHECK: movsw # encoding: [0x66,0xa5]
+// CHECK: movsw
+// CHECK: movsw
+	movsw
+	movsw	%ds:(%rsi), %es:(%rdi)
+	movsw	(%rsi), %es:(%rdi)
+
+// CHECK: movsd # encoding: [0xa5]
+// CHECK: movsd
+// CHECK: movsd
+	movsl
+	movsl	%ds:(%rsi), %es:(%rdi)
+	movsl	(%rsi), %es:(%rdi)
+
+// CHECK: movsq # encoding: [0x48,0xa5]
+// CHECK: movsq
+// CHECK: movsq
+	movsq
+	movsq	%ds:(%rsi), %es:(%rdi)
+	movsq	(%rsi), %es:(%rdi)
+
+// CHECK: lodsb # encoding: [0xac]
+// CHECK: lodsb
+// CHECK: lodsb
+// CHECK: lodsb
+// CHECK: lodsb
+	lodsb
+	lodsb	%ds:(%rsi), %al
+	lodsb	(%rsi), %al
+	lods	%ds:(%rsi), %al
+	lods	(%rsi), %al
+
+// CHECK: lodsw # encoding: [0x66,0xad]
+// CHECK: lodsw
+// CHECK: lodsw
+// CHECK: lodsw
+// CHECK: lodsw
+	lodsw
+	lodsw	%ds:(%rsi), %ax
+	lodsw	(%rsi), %ax
+	lods	%ds:(%rsi), %ax
+	lods	(%rsi), %ax
+
+// CHECK: lodsl # encoding: [0xad]
+// CHECK: lodsl
+// CHECK: lodsl
+// CHECK: lodsl
+// CHECK: lodsl
+	lodsl
+	lodsl	%ds:(%rsi), %eax
+	lodsl	(%rsi), %eax
+	lods	%ds:(%rsi), %eax
+	lods	(%rsi), %eax
+
+// CHECK: lodsq # encoding: [0x48,0xad]
+// CHECK: lodsq
+// CHECK: lodsq
+// CHECK: lodsq
+// CHECK: lodsq
+	lodsq
+	lodsq	%ds:(%rsi), %rax
+	lodsq	(%rsi), %rax
+	lods	%ds:(%rsi), %rax
+	lods	(%rsi), %rax
+
+// CHECK: stosb # encoding: [0xaa]
+// CHECK: stosb
+// CHECK: stosb
+	stosb
+	stosb	%al, %es:(%rdi)
+	stos	%al, %es:(%rdi)
+
+// CHECK: stosw # encoding: [0x66,0xab]
+// CHECK: stosw
+// CHECK: stosw
+	stosw
+	stosw	%ax, %es:(%rdi)
+	stos	%ax, %es:(%rdi)
+
+// CHECK: stosl # encoding: [0xab]
+// CHECK: stosl
+// CHECK: stosl
+	stosl
+	stosl	%eax, %es:(%rdi)
+	stos	%eax, %es:(%rdi)
+
+// CHECK: stosq # encoding: [0x48,0xab]
+// CHECK: stosq
+// CHECK: stosq
+	stosq
+	stosq	%rax, %es:(%rdi)
+	stos	%rax, %es:(%rdi)
+
+// CHECK: strw
+// CHECK: encoding: [0x66,0x0f,0x00,0xc8]
+	str %ax
+
+// CHECK: strl
+// CHECK: encoding: [0x0f,0x00,0xc8]
+	str %eax
+
+// CHECK: strw
+// CHECK: encoding: [0x66,0x0f,0x00,0xc8]
+	str %ax
+
+// CHECK: strq
+// CHECK: encoding: [0x48,0x0f,0x00,0xc8]
+	str %rax
diff --git a/test/MC/X86/x86_64-encoding.s b/test/MC/X86/x86_64-encoding.s
index 756da4d..cfdf87f3 100644
--- a/test/MC/X86/x86_64-encoding.s
+++ b/test/MC/X86/x86_64-encoding.s
@@ -155,3 +155,19 @@ pshufb	CPI1_0(%rip), %xmm1
 // CHECK: leave
 // CHECK:  encoding: [0xc9]
         	leaveq
+
+// CHECK: flds	(%edi)
+// CHECK:  encoding: [0x67,0xd9,0x07]
+        	flds	(%edi)
+
+// CHECK: filds	(%edi)
+// CHECK:  encoding: [0x67,0xdf,0x07]
+        	filds	(%edi)
+
+// CHECK: flds	(%rdi)
+// CHECK:  encoding: [0xd9,0x07]
+        	flds	(%rdi)
+
+// CHECK: filds	(%rdi)
+// CHECK:  encoding: [0xdf,0x07]
+        	filds	(%rdi)
diff --git a/test/Makefile b/test/Makefile
index b37bbfc..0d84186 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -173,22 +173,24 @@ site.exp: FORCE
 
 lit.site.cfg: site.exp
 	@echo "Making LLVM 'lit.site.cfg' file..."
-	@sed -e "s#@LLVM_SOURCE_DIR@#$(LLVM_SRC_ROOT)#g" \
-	     -e "s#@LLVM_BINARY_DIR@#$(LLVM_OBJ_ROOT)#g" \
-	     -e "s#@LLVM_TOOLS_DIR@#$(ToolDir)#g" \
-	     -e "s#@LLVMGCCDIR@#$(LLVMGCCDIR)#g" \
-	     -e "s#@ENABLE_SHARED@#$(ENABLE_SHARED)#g" \
-	     $(PROJ_SRC_DIR)/lit.site.cfg.in > $@
+	@$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g > lit.tmp
+	@$(ECHOPATH) s=@LLVM_BINARY_DIR@=$(LLVM_OBJ_ROOT)=g >> lit.tmp
+	@$(ECHOPATH) s=@LLVM_TOOLS_DIR@=$(ToolDir)=g >> lit.tmp
+	@$(ECHOPATH) s=@LLVMGCCDIR@=$(LLVMGCCDIR)=g >> lit.tmp
+	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=python=g >> lit.tmp
+	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
+	@sed -f lit.tmp $(PROJ_SRC_DIR)/lit.site.cfg.in > $@
+	@-rm -f lit.tmp
 
 Unit/lit.site.cfg: $(PROJ_OBJ_DIR)/Unit/.dir FORCE
 	@echo "Making LLVM unittest 'lit.site.cfg' file..."
-	@sed -e "s#@LLVM_SOURCE_DIR@#$(LLVM_SRC_ROOT)#g" \
-	     -e "s#@LLVM_BINARY_DIR@#$(LLVM_OBJ_ROOT)#g" \
-	     -e "s#@LLVM_TOOLS_DIR@#$(ToolDir)#g" \
-	     -e "s#@LLVMGCCDIR@#$(LLVMGCCDIR)#g" \
-	     -e "s#@LLVM_BUILD_MODE@#$(BuildMode)#g" \
-	     -e "s#@ENABLE_SHARED@#$(ENABLE_SHARED)#g" \
-	     -e "s#@SHLIBDIR@#$(SharedLibDir)#g" \
-	     -e "s#@SHLIBPATH_VAR@#$(SHLIBPATH_VAR)#g" \
-	     $(PROJ_SRC_DIR)/Unit/lit.site.cfg.in > $@
-
+	@$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g > unit.tmp
+	@$(ECHOPATH) s=@LLVM_BINARY_DIR@=$(LLVM_OBJ_ROOT)=g >> unit.tmp
+	@$(ECHOPATH) s=@LLVM_TOOLS_DIR@=$(ToolDir)=g >> unit.tmp
+	@$(ECHOPATH) s=@LLVMGCCDIR@=$(LLVMGCCDIR)=g >> unit.tmp
+	@$(ECHOPATH) s=@LLVM_BUILD_MODE@=$(BuildMode)=g >> unit.tmp
+	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> unit.tmp
+	@$(ECHOPATH) s=@SHLIBDIR@=$(SharedLibDir)=g >> unit.tmp
+	@$(ECHOPATH) s=@SHLIBPATH_VAR@=$(SHLIBPATH_VAR)=g >> unit.tmp
+	@sed -f unit.tmp $(PROJ_SRC_DIR)/Unit/lit.site.cfg.in > $@
+	@-rm -f unit.tmp
diff --git a/test/TableGen/TargetInstrInfo.td b/test/TableGen/TargetInstrInfo.td
index 146ef6f..6c39d5c 100644
--- a/test/TableGen/TargetInstrInfo.td
+++ b/test/TableGen/TargetInstrInfo.td
@@ -110,7 +110,7 @@ def SHL32rCL : Inst<(ops R32:$dst, R32:$src),
                   [(set R32:$dst, (shl R32:$src, CL))]>;
 
 // The RTL list is a list, allowing complex instructions to be defined easily.
-// Temporary 'internal' registers can be used to break instructions appart.
+// Temporary 'internal' registers can be used to break instructions apart.
 let isTwoAddress = 1 in
 def XOR32mi : Inst<(ops addr:$addr, imm32:$imm),
                    "xor $dst, $src2", 0x81, MRM6m,
diff --git a/test/Transforms/CodeGenPrepare/basic.ll b/test/Transforms/CodeGenPrepare/basic.ll
index 3b1fca3..ebf10f0 100644
--- a/test/Transforms/CodeGenPrepare/basic.ll
+++ b/test/Transforms/CodeGenPrepare/basic.ll
@@ -14,13 +14,14 @@ entry:
   br i1 %1, label %T, label %trap
 
 ; CHECK: entry:
-; HECK-NEXT: ret i32 4
+; CHECK-NEXT: br label %T
 
 trap:                                             ; preds = %0, %entry
   tail call void @llvm.trap() noreturn nounwind
   unreachable
 
 T:
+; CHECK: ret i32 4
   ret i32 4
 }
 
diff --git a/test/Transforms/ConstProp/2002-05-03-NotOperator.ll b/test/Transforms/ConstProp/2002-05-03-NotOperator.ll
index d9cd674..b957220 100644
--- a/test/Transforms/ConstProp/2002-05-03-NotOperator.ll
+++ b/test/Transforms/ConstProp/2002-05-03-NotOperator.ll
@@ -1,4 +1,4 @@
-; This bug has to do with the fact that constant propogation was implemented in
+; This bug has to do with the fact that constant propagation was implemented in
 ; terms of _logical_ not (! in C) instead of _bitwise_ not (~ in C).  This was
 ; due to a spec change.
 
diff --git a/test/Transforms/ConstProp/basictest.ll b/test/Transforms/ConstProp/basictest.ll
index df57fb6..d0d0a5b 100644
--- a/test/Transforms/ConstProp/basictest.ll
+++ b/test/Transforms/ConstProp/basictest.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -constprop -die -S | FileCheck %s
 
-; This is a basic sanity check for constant propogation.  The add instruction 
+; This is a basic sanity check for constant propagation.  The add instruction
 ; should be eliminated.
 define i32 @test1(i1 %B) {
         br i1 %B, label %BB1, label %BB2
diff --git a/test/Transforms/ConstProp/logicaltest.ll b/test/Transforms/ConstProp/logicaltest.ll
index c74296a..abd3275 100644
--- a/test/Transforms/ConstProp/logicaltest.ll
+++ b/test/Transforms/ConstProp/logicaltest.ll
@@ -1,4 +1,4 @@
-; Ensure constant propogation of logical instructions is working correctly.
+; Ensure constant propagation of logical instructions is working correctly.
 
 ; RUN: opt < %s -constprop -die -S | FileCheck %s
 ; CHECK-NOT:     {{and|or|xor}}
diff --git a/test/Transforms/ConstProp/overflow-ops.ll b/test/Transforms/ConstProp/overflow-ops.ll
index 5587e9b..d1cc2eb 100644
--- a/test/Transforms/ConstProp/overflow-ops.ll
+++ b/test/Transforms/ConstProp/overflow-ops.ll
@@ -2,6 +2,14 @@
 
 %i8i1 = type {i8, i1}
 
+declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.umul.with.overflow.i8(i8, i8)
+
+declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.smul.with.overflow.i8(i8, i8)
+
 ;;-----------------------------
 ;; uadd
 ;;-----------------------------
@@ -47,6 +55,28 @@ entry:
 }
 
 ;;-----------------------------
+;; umul
+;;-----------------------------
+
+define {i8, i1} @umul_1() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 100, i8 3)
+  ret {i8, i1} %t
+
+; CHECK: @umul_1
+; CHECK: ret %i8i1 { i8 44, i1 true }
+}
+
+define {i8, i1} @umul_2() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 100, i8 2)
+  ret {i8, i1} %t
+
+; CHECK: @umul_2
+; CHECK: ret %i8i1 { i8 -56, i1 false }
+}
+
+;;-----------------------------
 ;; sadd
 ;;-----------------------------
 
@@ -163,14 +193,9 @@ entry:
 ; CHECK: ret %i8i1 { i8 -10, i1 false }
 }
 
-
-
-declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
-declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8)
-
-declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8)
-declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8)
-declare {i8, i1} @llvm.smul.with.overflow.i8(i8, i8)
+;;-----------------------------
+;; smul
+;;-----------------------------
 
 ; rdar://8501501
 define {i8, i1} @smul_1() nounwind {
diff --git a/test/Transforms/ConstProp/phi.ll b/test/Transforms/ConstProp/phi.ll
index 3d9e284..c65d34c 100644
--- a/test/Transforms/ConstProp/phi.ll
+++ b/test/Transforms/ConstProp/phi.ll
@@ -1,4 +1,4 @@
-; This is a basic sanity check for constant propogation.  The add instruction 
+; This is a basic sanity check for constant propagation.  The add instruction
 ; should be eliminated.
 
 ; RUN: opt < %s -constprop -die -S | not grep phi
diff --git a/test/Transforms/DeadArgElim/deadexternal.ll b/test/Transforms/DeadArgElim/deadexternal.ll
index 5a80aba..8409261 100644
--- a/test/Transforms/DeadArgElim/deadexternal.ll
+++ b/test/Transforms/DeadArgElim/deadexternal.ll
@@ -37,3 +37,16 @@ entry:
   call void @f(i32 %tmp)
   ret void
 }
+
+; Check that callers are not transformed for weak definitions.
+define weak i32 @weak_f(i32 %x) nounwind {
+entry:
+  ret i32 0
+}
+define void @weak_f_caller() nounwind {
+entry:
+; CHECK: call i32 @weak_f(i32 10)
+  %call = tail call i32 @weak_f(i32 10)
+  ret void
+}
+
diff --git a/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll b/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll
new file mode 100644
index 0000000..079eec4
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+; PR9561
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin9.8"
+
+@A = external global [0 x i32]
+
+declare cc10 void @Func2(i32*, i32*, i32*, i32)
+
+define cc10 void @Func1(i32* noalias %Arg1, i32* noalias %Arg2, i32* %Arg3, i32 %Arg4) {
+entry:
+  store i32 add (i32 ptrtoint ([0 x i32]* @A to i32), i32 1), i32* %Arg2
+; CHECK: store i32 add (i32 ptrtoint ([0 x i32]* @A to i32), i32 1), i32* %Arg2
+  %ln2gz = getelementptr i32* %Arg1, i32 14
+  %ln2gA = bitcast i32* %ln2gz to double*
+  %ln2gB = load double* %ln2gA
+  %ln2gD = getelementptr i32* %Arg2, i32 -3
+  %ln2gE = bitcast i32* %ln2gD to double*
+  store double %ln2gB, double* %ln2gE
+; CHECK: store double %ln2gB, double* %ln2gE
+  tail call cc10 void @Func2(i32* %Arg1, i32* %Arg2, i32* %Arg3, i32 %Arg4) nounwind
+  ret void
+}
diff --git a/test/Transforms/GVN/invariant-simple.ll b/test/Transforms/GVN/invariant-simple.ll
deleted file mode 100644
index 98ea48c..0000000
--- a/test/Transforms/GVN/invariant-simple.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin7"
-
-define i8 @test(i8* %P) nounwind {
-; CHECK: @test
-; CHECK-NOT: load
-; CHECK: ret i8
-entry:
-  store i8 1, i8* %P
-  %0 = call {}* @llvm.invariant.start(i64 32, i8* %P)
-  %1 = tail call i32 @foo(i8* %P)
-  call void @llvm.invariant.end({}* %0, i64 32, i8* %P)
-  %2 = load i8* %P
-  ret i8 %2
-}
-
-define i8 @test2(i8* %P) nounwind {
-; CHECK: @test2
-; CHECK: store i8 1
-; CHECK: store i8 2
-; CHECK: ret i8 0
-entry:
-  store i8 1, i8* %P
-  %0 = call {}* @llvm.invariant.start(i64 32, i8* %P)
-  %1 = tail call i32 @bar(i8* %P)
-  call void @llvm.invariant.end({}* %0, i64 32, i8* %P)
-  store i8 2, i8* %P
-  ret i8 0
-}
-
-declare i32 @foo(i8*) nounwind 
-declare i32 @bar(i8*) nounwind readonly
-declare {}* @llvm.invariant.start(i64 %S, i8* nocapture %P) readonly
-declare void @llvm.invariant.end({}* %S, i64 %SS, i8* nocapture %P)
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 2e43321..4ff5bec 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -1,7 +1,7 @@
-; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -gvn -S -die | FileCheck %s
 
 ; 32-bit little endian target.
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 
 ;; Trivial RLE test.
 define i32 @test0(i32 %V, i32* %P) {
@@ -544,3 +544,99 @@ entry:
 ; CHECK: ret i32 0
 }
 
+
+;;===----------------------------------------------------------------------===;;
+;; Load -> Load forwarding in partial alias case.
+;;===----------------------------------------------------------------------===;;
+
+define i32 @load_load_partial_alias(i8* %P) nounwind ssp {
+entry:
+  %0 = bitcast i8* %P to i32*
+  %tmp2 = load i32* %0
+  %add.ptr = getelementptr inbounds i8* %P, i64 1
+  %tmp5 = load i8* %add.ptr
+  %conv = zext i8 %tmp5 to i32
+  %add = add nsw i32 %tmp2, %conv
+  ret i32 %add
+
+; CHECK: @load_load_partial_alias
+; CHECK: load i32*
+; CHECK-NOT: load
+; CHECK: lshr i32 {{.*}}, 8
+; CHECK-NOT: load
+; CHECK: trunc i32 {{.*}} to i8
+; CHECK-NOT: load
+; CHECK: ret i32
+}
+
+
+; Cross block partial alias case.
+define i32 @load_load_partial_alias_cross_block(i8* %P) nounwind ssp {
+entry:
+  %xx = bitcast i8* %P to i32*
+  %x1 = load i32* %xx, align 4
+  %cmp = icmp eq i32 %x1, 127
+  br i1 %cmp, label %land.lhs.true, label %if.end
+
+land.lhs.true:                                    ; preds = %entry
+  %arrayidx4 = getelementptr inbounds i8* %P, i64 1
+  %tmp5 = load i8* %arrayidx4, align 1
+  %conv6 = zext i8 %tmp5 to i32
+  ret i32 %conv6
+
+if.end:
+  ret i32 52
+; CHECK: @load_load_partial_alias_cross_block
+; CHECK: land.lhs.true:
+; CHECK-NOT: load i8
+; CHECK: ret i32 %conv6
+}
+
+
+;;===----------------------------------------------------------------------===;;
+;; Load Widening
+;;===----------------------------------------------------------------------===;;
+
+%widening1 = type { i32, i8, i8, i8, i8 }
+
+@f = global %widening1 zeroinitializer, align 4
+
+define i32 @test_widening1(i8* %P) nounwind ssp noredzone {
+entry:
+  %tmp = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 1), align 4
+  %conv = zext i8 %tmp to i32
+  %tmp1 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 2), align 1
+  %conv2 = zext i8 %tmp1 to i32
+  %add = add nsw i32 %conv, %conv2
+  ret i32 %add
+; CHECK: @test_widening1
+; CHECK-NOT: load
+; CHECK: load i16*
+; CHECK-NOT: load
+; CHECK-ret i32
+}
+
+define i32 @test_widening2() nounwind ssp noredzone {
+entry:
+  %tmp = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 1), align 4
+  %conv = zext i8 %tmp to i32
+  %tmp1 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 2), align 1
+  %conv2 = zext i8 %tmp1 to i32
+  %add = add nsw i32 %conv, %conv2
+
+  %tmp2 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 3), align 2
+  %conv3 = zext i8 %tmp2 to i32
+  %add2 = add nsw i32 %add, %conv3
+
+  %tmp3 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 4), align 1
+  %conv4 = zext i8 %tmp3 to i32
+  %add3 = add nsw i32 %add2, %conv3
+
+  ret i32 %add3
+; CHECK: @test_widening2
+; CHECK-NOT: load
+; CHECK: load i32*
+; CHECK-NOT: load
+; CHECK-ret i32
+}
+
diff --git a/test/Transforms/GlobalOpt/2011-04-09-EmptyGlobalCtors.ll b/test/Transforms/GlobalOpt/2011-04-09-EmptyGlobalCtors.ll
new file mode 100644
index 0000000..321a487
--- /dev/null
+++ b/test/Transforms/GlobalOpt/2011-04-09-EmptyGlobalCtors.ll
@@ -0,0 +1,5 @@
+; RUN: opt < %s -globalopt -disable-output
+
+%0 = type { i32, void ()* }
+@llvm.global_ctors = appending global [0 x %0] zeroinitializer
+
diff --git a/test/Transforms/GlobalOpt/cxx-dtor.ll b/test/Transforms/GlobalOpt/cxx-dtor.ll
new file mode 100644
index 0000000..2263562
--- /dev/null
+++ b/test/Transforms/GlobalOpt/cxx-dtor.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+%0 = type { i32, void ()* }
+%struct.A = type { i8 }
+
+@a = global %struct.A zeroinitializer, align 1
+@__dso_handle = external global i8*
+@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @_GLOBAL__I_a }]
+
+; CHECK-NOT: call i32 @__cxa_atexit
+
+define internal void @__cxx_global_var_init() nounwind section "__TEXT,__StaticInit,regular,pure_instructions" {
+  %1 = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.A*)* @_ZN1AD1Ev to void (i8*)*), i8* getelementptr inbounds (%struct.A* @a, i32 0, i32 0), i8* bitcast (i8** @__dso_handle to i8*))
+  ret void
+}
+
+define linkonce_odr void @_ZN1AD1Ev(%struct.A* %this) nounwind align 2 {
+  call void @_ZN1AD2Ev(%struct.A* %this)
+  ret void
+}
+
+declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
+
+define linkonce_odr void @_ZN1AD2Ev(%struct.A* %this) nounwind align 2 {
+  ret void
+}
+
+define internal void @_GLOBAL__I_a() nounwind section "__TEXT,__StaticInit,regular,pure_instructions" {
+  call void @__cxx_global_var_init()
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll b/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll
index 37ad63a..0f6267b 100644
--- a/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll
+++ b/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -indvars -S | not grep {sext}
 ; ModuleID = '<stdin>'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
 target triple = "x86_64-apple-darwin9.6"
 @a = external global i32*		; <i32**> [#uses=3]
 @b = external global i32*		; <i32**> [#uses=3]
diff --git a/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll b/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
index 803b540..46d6b38 100644
--- a/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
+++ b/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
@@ -13,7 +13,7 @@
 ;    d[(i+2)&15] = e[(i+2)&15]+f[(i+2)&15]+K[i+2];
 ;  }
 ;}
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
 target triple = "x86_64-apple-darwin9.6"
 @a = external global i32*		; <i32**> [#uses=3]
 @b = external global i32*		; <i32**> [#uses=3]
diff --git a/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll b/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll
index 9fd2d2f..47164d8 100644
--- a/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll
+++ b/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -S | grep icmp | grep next
+; RUN: opt < %s -indvars -S | FileCheck %s
 ; PR4086
 declare void @foo()
 
@@ -6,13 +6,14 @@ define void @test() {
 entry:
         br label %loop_body
 
-loop_body:              
-        %i = phi float [ %nexti, %loop_body ], [ 0.0, %entry ]          
+loop_body:
+        %i = phi float [ %nexti, %loop_body ], [ 0.0, %entry ]
         tail call void @foo()
         %nexti = fadd float %i, 1.0
-        %less = fcmp olt float %nexti, 2.0              
+        ; CHECK: icmp ne i32 %{{[a-zA-Z$._0-9]+}}, 2
+        %less = fcmp olt float %nexti, 2.0
         br i1 %less, label %loop_body, label %done
 
-done:           
+done:
         ret void
 }
diff --git a/test/Transforms/IndVarSimplify/ada-loops.ll b/test/Transforms/IndVarSimplify/ada-loops.ll
index 436840a..4a07d99 100644
--- a/test/Transforms/IndVarSimplify/ada-loops.ll
+++ b/test/Transforms/IndVarSimplify/ada-loops.ll
@@ -11,7 +11,7 @@
 ; count without casting.
 
 ; ModuleID = 'ada.bc'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-n:8:16:32"
 target triple = "i686-pc-linux-gnu"
 
 define void @kinds__sbytezero([256 x i32]* nocapture %a) nounwind {
diff --git a/test/Transforms/IndVarSimplify/addrec-gep.ll b/test/Transforms/IndVarSimplify/addrec-gep.ll
index 345f666..58cba60 100644
--- a/test/Transforms/IndVarSimplify/addrec-gep.ll
+++ b/test/Transforms/IndVarSimplify/addrec-gep.ll
@@ -9,7 +9,7 @@
 ; be able to reconstruct the full getelementptr, despite it having a few
 ; obstacles set in its way.
 
-target datalayout = "e-p:64:64:64"
+target datalayout = "e-p:64:64:64-n:32:64"
 
 define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double* nocapture %p) nounwind {
 entry:
diff --git a/test/Transforms/IndVarSimplify/ashr-tripcount.ll b/test/Transforms/IndVarSimplify/ashr-tripcount.ll
index baaefdc..09d559f 100644
--- a/test/Transforms/IndVarSimplify/ashr-tripcount.ll
+++ b/test/Transforms/IndVarSimplify/ashr-tripcount.ll
@@ -4,7 +4,7 @@
 ; Indvars should be able to eliminate all of the sign extensions
 ; inside the loop.
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
 @pow_2_tab = external constant [0 x float]		; <[0 x float]*> [#uses=1]
 @pow_2_025_tab = external constant [0 x float]		; <[0 x float]*> [#uses=1]
 @i_pow_2_tab = external constant [0 x float]		; <[0 x float]*> [#uses=1]
diff --git a/test/Transforms/IndVarSimplify/iv-sext.ll b/test/Transforms/IndVarSimplify/iv-sext.ll
index 5516502..3e90873 100644
--- a/test/Transforms/IndVarSimplify/iv-sext.ll
+++ b/test/Transforms/IndVarSimplify/iv-sext.ll
@@ -6,7 +6,7 @@
 ; inner loop to i64.
 ; TODO: it should promote hiPart to i64 in the outer loop too.
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
 
 define void @t(float* %pTmp1, float* %peakWeight, float* %nrgReducePeakrate, i32 %bandEdgeIndex, float %tmp1) nounwind {
 entry:
diff --git a/test/Transforms/IndVarSimplify/iv-zext.ll b/test/Transforms/IndVarSimplify/iv-zext.ll
index 1cc559f..80a77b6 100644
--- a/test/Transforms/IndVarSimplify/iv-zext.ll
+++ b/test/Transforms/IndVarSimplify/iv-zext.ll
@@ -2,7 +2,7 @@
 ; RUN: not grep and %t
 ; RUN: not grep zext %t
 
-target datalayout = "-p:64:64:64"
+target datalayout = "-p:64:64:64-n:32:64"
 
 define void @foo(double* %d, i64 %n) nounwind {
 entry:
diff --git a/test/Transforms/IndVarSimplify/max-pointer.ll b/test/Transforms/IndVarSimplify/max-pointer.ll
index f18f968..46ac2d8 100644
--- a/test/Transforms/IndVarSimplify/max-pointer.ll
+++ b/test/Transforms/IndVarSimplify/max-pointer.ll
@@ -2,7 +2,7 @@
 ; RUN: grep {icmp ugt i8\\\*} %t | count 1
 ; RUN: grep {icmp sgt i8\\\*} %t | count 1
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
 
 	%struct.CKenCodeCodec = type <{ i8 }>
 
diff --git a/test/Transforms/IndVarSimplify/pointer.ll b/test/Transforms/IndVarSimplify/pointer.ll
index 5eee655..d55bf98 100644
--- a/test/Transforms/IndVarSimplify/pointer.ll
+++ b/test/Transforms/IndVarSimplify/pointer.ll
@@ -9,7 +9,7 @@
 ; Indvars should be able to expand the pointer-arithmetic
 ; IV into an integer IV indexing into a simple getelementptr.
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-n:32:64"
 
 define void @foo(i8* %A, i64 %n) nounwind {
 entry:
diff --git a/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll b/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll
index 3a5c0b6..26f05c4 100644
--- a/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll
+++ b/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll
@@ -2,7 +2,7 @@
 ; RUN: not grep inttoptr %t
 ; RUN: not grep ptrtoint %t
 ; RUN: grep scevgep %t
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n:32:64"
 
 ; Indvars shouldn't need inttoptr/ptrtoint to expand an address here.
 
diff --git a/test/Transforms/IndVarSimplify/preserve-gep-nested.ll b/test/Transforms/IndVarSimplify/preserve-gep-nested.ll
index bb0993c..b41de58 100644
--- a/test/Transforms/IndVarSimplify/preserve-gep-nested.ll
+++ b/test/Transforms/IndVarSimplify/preserve-gep-nested.ll
@@ -13,7 +13,7 @@
 
 ; FIXME: This test should pass with or without TargetData. Until opt
 ; supports running tests without targetdata, just hardware this in.
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
 
 %struct.Q = type { [10 x %struct.N] }
 %struct.N = type { %struct.S }
diff --git a/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll b/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
index e17368b..ca0c399 100644
--- a/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
+++ b/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -indvars -S \
 ; RUN:   | grep {\[%\]p.2.ip.1 = getelementptr \\\[3 x \\\[3 x double\\\]\\\]\\* \[%\]p, i64 2, i64 \[%\]tmp, i64 1}
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n:32:64"
 
 ; Indvars shouldn't expand this to
 ;   %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 %tmp, i64 19
diff --git a/test/Transforms/IndVarSimplify/preserve-gep.ll b/test/Transforms/IndVarSimplify/preserve-gep.ll
index a27d20d..82eda03 100644
--- a/test/Transforms/IndVarSimplify/preserve-gep.ll
+++ b/test/Transforms/IndVarSimplify/preserve-gep.ll
@@ -6,7 +6,7 @@
 ; Indvars shouldn't leave getelementptrs expanded out as
 ; inttoptr+ptrtoint in its output in common cases.
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 	%struct.Foo = type { i32, i32, [10 x i32], i32 }
 
diff --git a/test/Transforms/InstCombine/2011-03-08-SRemMinusOneBadOpt.ll b/test/Transforms/InstCombine/2011-03-08-SRemMinusOneBadOpt.ll
new file mode 100644
index 0000000..6a3e3e4
--- /dev/null
+++ b/test/Transforms/InstCombine/2011-03-08-SRemMinusOneBadOpt.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR9346
+
+define i32 @test(i64 %x) nounwind {
+; CHECK: ret i32 0
+entry:
+  %or = or i64 %x, 4294967294
+  %conv = trunc i64 %or to i32
+  %rem.i = srem i32 %conv, -1
+  ret i32 %rem.i
+}
+
diff --git a/test/Transforms/InstCombine/ExtractCast.ll b/test/Transforms/InstCombine/ExtractCast.ll
new file mode 100644
index 0000000..5ebbefd
--- /dev/null
+++ b/test/Transforms/InstCombine/ExtractCast.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -instcombine -S -o - | FileCheck %s
+
+; CHECK: @a
+define i32 @a(<4 x i64> %I) {
+entry:
+; CHECK-NOT: trunc <4 x i64>
+        %J = trunc <4 x i64> %I to <4 x i32>
+        %K = extractelement <4 x i32> %J, i32 3
+; CHECK: extractelement <4 x i64>
+; CHECK: trunc i64
+; CHECK: ret
+        ret i32 %K
+}
+
+
+; CHECK: @b
+define i32 @b(<4 x float> %I) {
+entry:
+; CHECK-NOT: fptosi <4 x float>
+        %J = fptosi <4 x float> %I to <4 x i32>
+        %K = extractelement <4 x i32> %J, i32 3
+; CHECK: extractelement <4 x float>
+; CHECK: fptosi float
+; CHECK: ret
+        ret i32 %K
+}
+
diff --git a/test/Transforms/InstCombine/and-or-not.ll b/test/Transforms/InstCombine/and-or-not.ll
index 37ec3bc..bd878b0 100644
--- a/test/Transforms/InstCombine/and-or-not.ll
+++ b/test/Transforms/InstCombine/and-or-not.ll
@@ -4,7 +4,7 @@
 
 ; PR1510
 
-; These are all equivelent to A^B
+; These are all equivalent to A^B
 
 define i32 @test1(i32 %a, i32 %b) {
 entry:
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index d898ea3..a888152 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -26,3 +26,12 @@ define i32 @test3(i32 %X, i32 %Y) {
 ; CHECK-NEXT: and i32 %X, %Y
 ; CHECK-NEXT: ret
 }
+
+define i1 @test4(i32 %X) {
+  %a = icmp ult i32 %X, 31
+  %b = icmp slt i32 %X, 0
+  %c = and i1 %a, %b
+  ret i1 %c
+; CHECK: @test4
+; CHECK-NEXT: ret i1 false
+}
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
new file mode 100644
index 0000000..f6892fc
--- /dev/null
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+
+declare i8* @foo(i8*, i32, i64, i64) nounwind
+
+define hidden i8* @foobar(i8* %__dest, i32 %__val, i64 %__len) nounwind inlinehint ssp {
+entry:
+  %__dest.addr = alloca i8*, align 8
+  %__val.addr = alloca i32, align 4
+  %__len.addr = alloca i64, align 8
+  store i8* %__dest, i8** %__dest.addr, align 8, !tbaa !1
+; CHECK-NOT: call void @llvm.dbg.declare
+; CHECK: call void @llvm.dbg.value
+  call void @llvm.dbg.declare(metadata !{i8** %__dest.addr}, metadata !0), !dbg !16
+  store i32 %__val, i32* %__val.addr, align 4, !tbaa !17
+  call void @llvm.dbg.declare(metadata !{i32* %__val.addr}, metadata !7), !dbg !18
+  store i64 %__len, i64* %__len.addr, align 8, !tbaa !19
+  call void @llvm.dbg.declare(metadata !{i64* %__len.addr}, metadata !9), !dbg !20
+  %tmp = load i8** %__dest.addr, align 8, !dbg !21, !tbaa !13
+  %tmp1 = load i32* %__val.addr, align 4, !dbg !21, !tbaa !17
+  %tmp2 = load i64* %__len.addr, align 8, !dbg !21, !tbaa !19
+  %tmp3 = load i8** %__dest.addr, align 8, !dbg !21, !tbaa !13
+  %0 = call i64 @llvm.objectsize.i64(i8* %tmp3, i1 false), !dbg !21
+  %call = call i8* @foo(i8* %tmp, i32 %tmp1, i64 %tmp2, i64 %0), !dbg !21
+  ret i8* %call, !dbg !21
+}
+
+!llvm.dbg.lv.foobar = !{!0, !7, !9}
+!llvm.dbg.sp = !{!1}
+
+!0 = metadata !{i32 590081, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", metadata !2, i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 589865, metadata !"string.h", metadata !"Game", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589841, i32 0, i32 12, metadata !"bits.c", metadata !"Game", metadata !"clang version 3.0 (trunk 127710)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 589839, metadata !3, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 590081, metadata !1, metadata !"__val", metadata !2, i32 33554510, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 589860, metadata !3, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 590081, metadata !1, metadata !"__len", metadata !2, i32 50331726, metadata !10, i32 0} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 589846, metadata !3, metadata !"size_t", metadata !2, i32 80, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_typedef ]
+!11 = metadata !{i32 589846, metadata !3, metadata !"__darwin_size_t", metadata !2, i32 90, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
+!12 = metadata !{i32 589860, metadata !3, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!13 = metadata !{metadata !"any pointer", metadata !14}
+!14 = metadata !{metadata !"omnipotent char", metadata !15}
+!15 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!16 = metadata !{i32 78, i32 28, metadata !1, null}
+!17 = metadata !{metadata !"int", metadata !14}
+!18 = metadata !{i32 78, i32 40, metadata !1, null}
+!19 = metadata !{metadata !"long", metadata !14}
+!20 = metadata !{i32 78, i32 54, metadata !1, null}
+!21 = metadata !{i32 80, i32 3, metadata !22, null}
+!22 = metadata !{i32 589835, metadata !23, i32 80, i32 3, metadata !2, i32 7} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 589835, metadata !1, i32 79, i32 1, metadata !2, i32 6} ; [ DW_TAG_lexical_block ]
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index 0d13980..2e24f19 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -1,34 +1,44 @@
 ; This test makes sure that div instructions are properly eliminated.
 
-; RUN: opt < %s -instcombine -S | not grep div
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A) {
         %B = sdiv i32 %A, 1             ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: @test1
+; CHECK-NEXT: ret i32 %A
 }
 
 define i32 @test2(i32 %A) {
         ; => Shift
         %B = udiv i32 %A, 8             ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: @test2
+; CHECK-NEXT: lshr i32 %A, 3
 }
 
 define i32 @test3(i32 %A) {
         ; => 0, don't need to keep traps
         %B = sdiv i32 0, %A             ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: @test3
+; CHECK-NEXT: ret i32 0
 }
 
 define i32 @test4(i32 %A) {
         ; 0-A
         %B = sdiv i32 %A, -1            ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: @test4
+; CHECK-NEXT: sub i32 0, %A
 }
 
 define i32 @test5(i32 %A) {
         %B = udiv i32 %A, -16           ; <i32> [#uses=1]
         %C = udiv i32 %B, -4            ; <i32> [#uses=1]
         ret i32 %C
+; CHECK: @test5
+; CHECK-NEXT: ret i32 0
 }
 
 define i1 @test6(i32 %A) {
@@ -36,6 +46,8 @@ define i1 @test6(i32 %A) {
         ; A < 123
         %C = icmp eq i32 %B, 0          ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: @test6
+; CHECK-NEXT: icmp ult i32 %A, 123
 }
 
 define i1 @test7(i32 %A) {
@@ -43,6 +55,9 @@ define i1 @test7(i32 %A) {
         ; A >= 20 && A < 30
         %C = icmp eq i32 %B, 2          ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: @test7
+; CHECK-NEXT: add i32 %A, -20
+; CHECK-NEXT: icmp ult i32
 }
 
 define i1 @test8(i8 %A) {
@@ -50,6 +65,8 @@ define i1 @test8(i8 %A) {
         ; A >= 246
         %C = icmp eq i8 %B, 2           ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: @test8
+; CHECK-NEXT: icmp ugt i8 %A, -11
 }
 
 define i1 @test9(i8 %A) {
@@ -57,28 +74,47 @@ define i1 @test9(i8 %A) {
         ; A < 246
         %C = icmp ne i8 %B, 2           ; <i1> [#uses=1]
         ret i1 %C
+; CHECK: @test9
+; CHECK-NEXT: icmp ult i8 %A, -10
 }
 
 define i32 @test10(i32 %X, i1 %C) {
         %V = select i1 %C, i32 64, i32 8                ; <i32> [#uses=1]
         %R = udiv i32 %X, %V            ; <i32> [#uses=1]
         ret i32 %R
+; CHECK: @test10
+; CHECK-NEXT: select i1 %C, i32 6, i32 3
+; CHECK-NEXT: lshr i32 %X
 }
 
 define i32 @test11(i32 %X, i1 %C) {
         %A = select i1 %C, i32 1024, i32 32             ; <i32> [#uses=1]
         %B = udiv i32 %X, %A            ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: @test11
+; CHECK-NEXT: select i1 %C, i32 10, i32 5
+; CHECK-NEXT: lshr i32 %X
 }
 
 ; PR2328
 define i32 @test12(i32 %x) nounwind  {
 	%tmp3 = udiv i32 %x, %x		; 1
 	ret i32 %tmp3
+; CHECK: @test12
+; CHECK-NEXT: ret i32 1
 }
 
 define i32 @test13(i32 %x) nounwind  {
 	%tmp3 = sdiv i32 %x, %x		; 1
 	ret i32 %tmp3
+; CHECK: @test13
+; CHECK-NEXT: ret i32 1
 }
 
+define i32 @test14(i8 %x) nounwind {
+	%zext = zext i8 %x to i32
+	%div = udiv i32 %zext, 257	; 0
+	ret i32 %div
+; CHECK: @test14
+; CHECK-NEXT: ret i32 0
+}
diff --git a/test/Transforms/InstCombine/fcmp.ll b/test/Transforms/InstCombine/fcmp.ll
new file mode 100644
index 0000000..2eb4f05
--- /dev/null
+++ b/test/Transforms/InstCombine/fcmp.ll
@@ -0,0 +1,60 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define i1 @test1(float %x, float %y) nounwind {
+  %ext1 = fpext float %x to double
+  %ext2 = fpext float %y to double
+  %cmp = fcmp ogt double %ext1, %ext2
+  ret i1 %cmp
+; CHECK: @test1
+; CHECK-NEXT: fcmp ogt float %x, %y
+}
+
+define i1 @test2(float %a) nounwind {
+  %ext = fpext float %a to double
+  %cmp = fcmp ogt double %ext, 1.000000e+00
+  ret i1 %cmp
+; CHECK: @test2
+; CHECK-NEXT: fcmp ogt float %a, 1.0
+}
+
+define i1 @test3(float %a) nounwind {
+  %ext = fpext float %a to double
+  %cmp = fcmp ogt double %ext, 0x3FF0000000000001 ; more precision than float.
+  ret i1 %cmp
+; CHECK: @test3
+; CHECK-NEXT: fpext float %a to double
+}
+
+define i1 @test4(float %a) nounwind {
+  %ext = fpext float %a to double
+  %cmp = fcmp ogt double %ext, 0x36A0000000000000 ; denormal in float.
+  ret i1 %cmp
+; CHECK: @test4
+; CHECK-NEXT: fpext float %a to double
+}
+
+define i1 @test5(float %a) nounwind {
+  %neg = fsub float -0.000000e+00, %a
+  %cmp = fcmp ogt float %neg, 1.000000e+00
+  ret i1 %cmp
+; CHECK: @test5
+; CHECK-NEXT: fcmp olt float %a, -1.0
+}
+
+define i1 @test6(float %x, float %y) nounwind {
+  %neg1 = fsub float -0.000000e+00, %x
+  %neg2 = fsub float -0.000000e+00, %y
+  %cmp = fcmp olt float %neg1, %neg2
+  ret i1 %cmp
+; CHECK: @test6
+; CHECK-NEXT: fcmp ogt float %x, %y
+}
+
+define i1 @test7(float %x) nounwind readnone ssp noredzone {
+  %ext = fpext float %x to ppc_fp128
+  %cmp = fcmp ogt ppc_fp128 %ext, 0xM00000000000000000000000000000000
+  ret i1 %cmp
+; Can't convert ppc_fp128
+; CHECK: @test7
+; CHECK-NEXT: fpext float %x to ppc_fp128
+}
diff --git a/test/Transforms/InstCombine/fdiv.ll b/test/Transforms/InstCombine/fdiv.ll
new file mode 100644
index 0000000..a2cce01
--- /dev/null
+++ b/test/Transforms/InstCombine/fdiv.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define float @test1(float %x) nounwind readnone ssp {
+  %div = fdiv float %x, 0x3810000000000000
+  ret float %div
+
+; CHECK: @test1
+; CHECK-NEXT: fmul float %x, 0x47D0000000000000
+}
+
+define float @test2(float %x) nounwind readnone ssp {
+  %div = fdiv float %x, 0x47E0000000000000
+  ret float %div
+
+; CHECK: @test2
+; CHECK-NEXT: fdiv float %x, 0x47E0000000000000
+}
+
+define float @test3(float %x) nounwind readnone ssp {
+  %div = fdiv float %x, 0x36A0000000000000
+  ret float %div
+
+; CHECK: @test3
+; CHECK-NEXT: fdiv float %x, 0x36A0000000000000
+}
diff --git a/test/Transforms/InstCombine/fold-bin-operand.ll b/test/Transforms/InstCombine/fold-bin-operand.ll
index d0d072a..a8bad0d 100644
--- a/test/Transforms/InstCombine/fold-bin-operand.ll
+++ b/test/Transforms/InstCombine/fold-bin-operand.ll
@@ -1,14 +1,17 @@
-; RUN: opt < %s -instcombine -S | not grep icmp
+; RUN: opt < %s -instcombine -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 define i1 @f(i1 %x) {
+; CHECK: @f
+; CHECK: ret i1 false
 	%b = and i1 %x, icmp eq (i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 2 to i8*))
 	ret i1 %b
 }
 
-; FIXME: This doesn't fold at the moment!
-; define i32 @f(i32 %x) {
-;	%b = add i32 %x, zext (i1 icmp eq (i8* inttoptr (i32 1000000 to i8*), i8* inttoptr (i32 2000000 to i8*)) to i32)
-;	ret i32 %b
-;}
+define i32 @g(i32 %x) {
+; CHECK: @g
+; CHECK: ret i32 %x
+	%b = add i32 %x, zext (i1 icmp eq (i8* inttoptr (i32 1000000 to i8*), i8* inttoptr (i32 2000000 to i8*)) to i32)
+	ret i32 %b
+}
 
diff --git a/test/Transforms/InstCombine/gep-addrspace.ll b/test/Transforms/InstCombine/gep-addrspace.ll
new file mode 100644
index 0000000..dfe12db
--- /dev/null
+++ b/test/Transforms/InstCombine/gep-addrspace.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-win32"
+
+%myStruct = type { float, [3 x float], [4 x float], i32 }
+
+; make sure that we are not crashing when creating an illegal type
+define void @func(%myStruct addrspace(1)* nocapture %p) nounwind {
+ST:
+  %A = getelementptr inbounds %myStruct addrspace(1)* %p, i64 0
+  %B = bitcast %myStruct addrspace(1)* %A to %myStruct*
+  %C = getelementptr inbounds %myStruct* %B, i32 0, i32 1
+  %D = getelementptr inbounds [3 x float]* %C, i32 0, i32 2
+  %E = load float* %D, align 4
+  %F = fsub float %E, undef
+  ret void
+}
+
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 3150883..099540a 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -377,3 +377,136 @@ define i1 @test38(i32 %x, i32 %y, i32 %z) {
   %c = icmp ugt i32 %lhs, %rhs
   ret i1 %c
 }
+
+; PR9343 #1
+; CHECK: @test39
+; CHECK: %B = icmp eq i32 %X, 0
+define i1 @test39(i32 %X, i32 %Y) {
+  %A = ashr exact i32 %X, %Y
+  %B = icmp eq i32 %A, 0
+  ret i1 %B
+}
+
+; CHECK: @test40
+; CHECK: %B = icmp ne i32 %X, 0
+define i1 @test40(i32 %X, i32 %Y) {
+  %A = lshr exact i32 %X, %Y
+  %B = icmp ne i32 %A, 0
+  ret i1 %B
+}
+
+; PR9343 #3
+; CHECK: @test41
+; CHECK: ret i1 true
+define i1 @test41(i32 %X, i32 %Y) {
+  %A = urem i32 %X, %Y
+  %B = icmp ugt i32 %Y, %A
+  ret i1 %B
+}
+
+; CHECK: @test42
+; CHECK: %B = icmp sgt i32 %Y, -1
+define i1 @test42(i32 %X, i32 %Y) {
+  %A = srem i32 %X, %Y
+  %B = icmp slt i32 %A, %Y
+  ret i1 %B
+}
+
+; CHECK: @test43
+; CHECK: %B = icmp slt i32 %Y, 0
+define i1 @test43(i32 %X, i32 %Y) {
+  %A = srem i32 %X, %Y
+  %B = icmp slt i32 %Y, %A
+  ret i1 %B
+}
+
+; CHECK: @test44
+; CHECK: %B = icmp sgt i32 %Y, -1
+define i1 @test44(i32 %X, i32 %Y) {
+  %A = srem i32 %X, %Y
+  %B = icmp slt i32 %A, %Y
+  ret i1 %B
+}
+
+; CHECK: @test45
+; CHECK: %B = icmp slt i32 %Y, 0
+define i1 @test45(i32 %X, i32 %Y) {
+  %A = srem i32 %X, %Y
+  %B = icmp slt i32 %Y, %A
+  ret i1 %B
+}
+
+; PR9343 #4
+; CHECK: @test46
+; CHECK: %C = icmp ult i32 %X, %Y
+define i1 @test46(i32 %X, i32 %Y, i32 %Z) {
+  %A = ashr exact i32 %X, %Z
+  %B = ashr exact i32 %Y, %Z
+  %C = icmp ult i32 %A, %B
+  ret i1 %C
+}
+
+; PR9343 #5
+; CHECK: @test47
+; CHECK: %C = icmp ugt i32 %X, %Y
+define i1 @test47(i32 %X, i32 %Y, i32 %Z) {
+  %A = ashr exact i32 %X, %Z
+  %B = ashr exact i32 %Y, %Z
+  %C = icmp ugt i32 %A, %B
+  ret i1 %C
+}
+
+; PR9343 #8
+; CHECK: @test48
+; CHECK: %C = icmp eq i32 %X, %Y
+define i1 @test48(i32 %X, i32 %Y, i32 %Z) {
+  %A = sdiv exact i32 %X, %Z
+  %B = sdiv exact i32 %Y, %Z
+  %C = icmp eq i32 %A, %B
+  ret i1 %C
+}
+
+; PR8469
+; CHECK: @test49
+; CHECK: ret <2 x i1> <i1 true, i1 true>
+define <2 x i1> @test49(<2 x i32> %tmp3) {
+entry:
+  %tmp11 = and <2 x i32> %tmp3, <i32 3, i32 3>
+  %cmp = icmp ult <2 x i32> %tmp11, <i32 4, i32 4>
+  ret <2 x i1> %cmp  
+}
+
+; PR9343 #7
+; CHECK: @test50
+; CHECK: ret i1 true
+define i1 @test50(i16 %X, i32 %Y) {
+  %A = zext i16 %X to i32
+  %B = srem i32 %A, %Y
+  %C = icmp sgt i32 %B, -1
+  ret i1 %C
+}
+
+; CHECK: @test51
+; CHECK: ret i1 %C
+define i1 @test51(i32 %X, i32 %Y) {
+  %A = and i32 %X, 2147483648
+  %B = srem i32 %A, %Y
+  %C = icmp sgt i32 %B, -1
+  ret i1 %C
+}
+
+; CHECK: @test52
+; CHECK-NEXT: and i32 %x1, 16711935
+; CHECK-NEXT: icmp eq i32 {{.*}}, 4980863
+; CHECK-NEXT: ret i1
+define i1 @test52(i32 %x1) nounwind {
+  %conv = and i32 %x1, 255
+  %cmp = icmp eq i32 %conv, 127
+  %tmp2 = lshr i32 %x1, 16
+  %tmp3 = trunc i32 %tmp2 to i8
+  %cmp15 = icmp eq i8 %tmp3, 76
+
+  %A = and i1 %cmp, %cmp15
+  ret i1 %A
+}
+
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 50e7f1f..332cd46 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -112,6 +112,33 @@ define i8 @umultest2(i8 %A, i1* %overflowPtr) {
 ; CHECK-NEXT: ret i8 %A
 }
 
+%ov.result.32 = type { i32, i1 }
+declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+
+define i32 @umultest3(i32 %n) nounwind {
+  %shr = lshr i32 %n, 2
+  %mul = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %shr, i32 3)
+  %ov = extractvalue %ov.result.32 %mul, 1
+  %res = extractvalue %ov.result.32 %mul, 0
+  %ret = select i1 %ov, i32 -1, i32 %res
+  ret i32 %ret
+; CHECK: @umultest3
+; CHECK-NEXT: shr
+; CHECK-NEXT: mul nuw
+; CHECK-NEXT: ret
+}
+
+define i32 @umultest4(i32 %n) nounwind {
+  %shr = lshr i32 %n, 1
+  %mul = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %shr, i32 4)
+  %ov = extractvalue %ov.result.32 %mul, 1
+  %res = extractvalue %ov.result.32 %mul, 0
+  %ret = select i1 %ov, i32 -1, i32 %res
+  ret i32 %ret
+; CHECK: @umultest4
+; CHECK: umul.with.overflow
+}
+
 define void @powi(double %V, double *%P) {
 entry:
   %A = tail call double @llvm.powi.f64(double %V, i32 -1) nounwind
diff --git a/test/Transforms/InstCombine/merge-icmp.ll b/test/Transforms/InstCombine/merge-icmp.ll
new file mode 100644
index 0000000..00020b1
--- /dev/null
+++ b/test/Transforms/InstCombine/merge-icmp.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define i1 @test1(i16* %x) {
+  %load = load i16* %x, align 4
+  %trunc = trunc i16 %load to i8
+  %cmp1 = icmp eq i8 %trunc, 127
+  %and = and i16 %load, -256
+  %cmp2 = icmp eq i16 %and, 17664
+  %or = and i1 %cmp1, %cmp2
+  ret i1 %or
+; CHECK: @test1
+; CHECK-NEXT: load i16
+; CHECK-NEXT: icmp eq i16 %load, 17791
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test2(i16* %x) {
+  %load = load i16* %x, align 4
+  %and = and i16 %load, -256
+  %cmp1 = icmp eq i16 %and, 32512
+  %trunc = trunc i16 %load to i8
+  %cmp2 = icmp eq i8 %trunc, 69
+  %or = and i1 %cmp1, %cmp2
+  ret i1 %or
+; CHECK: @test2
+; CHECK-NEXT: load i16
+; CHECK-NEXT: icmp eq i16 %load, 32581
+; CHECK-NEXT: ret i1
+}
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index f82f9fa..94a5732 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -390,3 +390,22 @@ define i1 @test36(i32 %x) {
 ; CHECK-NEXT: ret i1
 }
 
+define i32 @test37(i32* %xp, i32 %y) {
+; CHECK: @test37
+; CHECK: select i1 %tobool, i32 -1, i32 %x
+  %tobool = icmp ne i32 %y, 0
+  %sext = sext i1 %tobool to i32
+  %x = load i32* %xp
+  %or = or i32 %sext, %x
+  ret i32 %or
+}
+
+define i32 @test38(i32* %xp, i32 %y) {
+; CHECK: @test38
+; CHECK: select i1 %tobool, i32 -1, i32 %x
+  %tobool = icmp ne i32 %y, 0
+  %sext = sext i1 %tobool to i32
+  %x = load i32* %xp
+  %or = or i32 %x, %sext
+  ret i32 %or
+}
diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll
index 62c6a63..cd865ae 100644
--- a/test/Transforms/InstCombine/phi.ll
+++ b/test/Transforms/InstCombine/phi.ll
@@ -197,25 +197,25 @@ declare i1 @test11a()
 define i1 @test11() {
 entry:
   %a = alloca i32
-  %i = ptrtoint i32* %a to i32
+  %i = ptrtoint i32* %a to i64
   %b = call i1 @test11a()
   br i1 %b, label %one, label %two
 
 one:
-  %x = phi i32 [%i, %entry], [%y, %two]
+  %x = phi i64 [%i, %entry], [%y, %two]
   %c = call i1 @test11a()
   br i1 %c, label %two, label %end
 
 two:
-  %y = phi i32 [%i, %entry], [%x, %one]
+  %y = phi i64 [%i, %entry], [%x, %one]
   %d = call i1 @test11a()
   br i1 %d, label %one, label %end
 
 end:
-  %f = phi i32 [ %x, %one], [%y, %two]
+  %f = phi i64 [ %x, %one], [%y, %two]
   ; Change the %f to %i, and the optimizer suddenly becomes a lot smarter
   ; even though %f must equal %i at this point
-  %g = inttoptr i32 %f to i32*
+  %g = inttoptr i64 %f to i32*
   store i32 10, i32* %g
   %z = call i1 @test11a()
   ret i1 %z
@@ -544,3 +544,79 @@ BB2:
 ; CHECK-NEXT: %C = add nuw i32 %A, 1
 ; CHECK-NEXT: ret i32 %C
 }
+
+; Same as test11, but used to be missed due to a bug.
+declare i1 @test25a()
+
+define i1 @test25() {
+entry:
+  %a = alloca i32
+  %i = ptrtoint i32* %a to i64
+  %b = call i1 @test25a()
+  br i1 %b, label %one, label %two
+
+one:
+  %x = phi i64 [%y, %two], [%i, %entry]
+  %c = call i1 @test25a()
+  br i1 %c, label %two, label %end
+
+two:
+  %y = phi i64 [%x, %one], [%i, %entry]
+  %d = call i1 @test25a()
+  br i1 %d, label %one, label %end
+
+end:
+  %f = phi i64 [ %x, %one], [%y, %two]
+  ; Change the %f to %i, and the optimizer suddenly becomes a lot smarter
+  ; even though %f must equal %i at this point
+  %g = inttoptr i64 %f to i32*
+  store i32 10, i32* %g
+  %z = call i1 @test25a()
+  ret i1 %z
+; CHECK: @test25
+; CHECK-NOT: phi i32
+; CHECK: ret i1 %z
+}
+
+declare i1 @test26a()
+
+define i1 @test26(i32 %n) {
+entry:
+  %a = alloca i32
+  %i = ptrtoint i32* %a to i64
+  %b = call i1 @test26a()
+  br label %one
+
+one:
+  %x = phi i64 [%y, %two], [%w, %three], [%i, %entry]
+  %c = call i1 @test26a()
+  switch i32 %n, label %end [
+          i32 2, label %two
+          i32 3, label %three
+  ]
+
+two:
+  %y = phi i64 [%x, %one], [%w, %three]
+  %d = call i1 @test26a()
+  switch i32 %n, label %end [
+          i32 10, label %one
+          i32 30, label %three
+  ]
+
+three:
+  %w = phi i64 [%y, %two], [%x, %one]
+  %e = call i1 @test26a()
+  br i1 %e, label %one, label %two
+
+end:
+  %f = phi i64 [ %x, %one], [%y, %two]
+  ; Change the %f to %i, and the optimizer suddenly becomes a lot smarter
+  ; even though %f must equal %i at this point
+  %g = inttoptr i64 %f to i32*
+  store i32 10, i32* %g
+  %z = call i1 @test26a()
+  ret i1 %z
+; CHECK: @test26
+; CHECK-NOT: phi i32
+; CHECK: ret i1 %z
+}
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index ba9d99c..3925907 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -714,3 +714,38 @@ define i32 @test52(i32 %n, i32 %m) nounwind {
   ret i32 %storemerge
 }
 
+; PR9454
+define i32 @test53(i32 %x) nounwind {
+  %and = and i32 %x, 2
+  %cmp = icmp eq i32 %and, %x
+  %sel = select i1 %cmp, i32 2, i32 1
+  ret i32 %sel
+; CHECK: @test53
+; CHECK: select i1 %cmp
+; CHECK: ret
+}
+
+define i32 @test54(i32 %X, i32 %Y) {
+  %A = ashr exact i32 %X, %Y
+  %B = icmp eq i32 %A, 0
+  %C = select i1 %B, i32 %A, i32 1
+  ret i32 %C
+; CHECK: @test54
+; CHECK-NOT: ashr
+; CHECK-NOT: select
+; CHECK: icmp ne i32 %X, 0
+; CHECK: zext 
+; CHECK: ret
+}
+
+define i1 @test55(i1 %X, i32 %Y, i32 %Z) {
+  %A = ashr exact i32 %Y, %Z
+  %B = select i1 %X, i32 %Y, i32 %A
+  %C = icmp eq i32 %B, 0
+  ret i1 %C
+; CHECK: @test55
+; CHECK-NOT: ashr
+; CHECK-NOT: select
+; CHECK: icmp eq
+; CHECK: ret i1
+}
diff --git a/test/Transforms/InstCombine/sext.ll b/test/Transforms/InstCombine/sext.ll
index 60669b7..f49a2ef 100644
--- a/test/Transforms/InstCombine/sext.ll
+++ b/test/Transforms/InstCombine/sext.ll
@@ -126,3 +126,61 @@ define void @test11(<2 x i16> %srcA, <2 x i16> %srcB, <2 x i16>* %dst) {
 ; CHECK-NEXT: store <2 x i16>
 ; CHECK-NEXT: ret
 }                                                                                                                               
+
+define i64 @test12(i32 %x) nounwind {
+  %shr = lshr i32 %x, 1
+  %sub = sub nsw i32 0, %shr
+  %conv = sext i32 %sub to i64
+  ret i64 %conv
+; CHECK: @test12
+; CHECK: sext
+; CHECK: ret
+}
+
+define i32 @test13(i32 %x) nounwind {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %ext = sext i1 %cmp to i32
+  ret i32 %ext
+; CHECK: @test13
+; CHECK-NEXT: %and = lshr i32 %x, 3
+; CHECK-NEXT: %1 = and i32 %and, 1
+; CHECK-NEXT: %sext = add i32 %1, -1
+; CHECK-NEXT: ret i32 %sext
+}
+
+define i32 @test14(i16 %x) nounwind {
+  %and = and i16 %x, 16
+  %cmp = icmp ne i16 %and, 16
+  %ext = sext i1 %cmp to i32
+  ret i32 %ext
+; CHECK: @test14
+; CHECK-NEXT: %and = lshr i16 %x, 4
+; CHECK-NEXT: %1 = and i16 %and, 1
+; CHECK-NEXT: %sext = add i16 %1, -1
+; CHECK-NEXT: %ext = sext i16 %sext to i32
+; CHECK-NEXT: ret i32 %ext
+}
+
+define i32 @test15(i32 %x) nounwind {
+  %and = and i32 %x, 16
+  %cmp = icmp ne i32 %and, 0
+  %ext = sext i1 %cmp to i32
+  ret i32 %ext
+; CHECK: @test15
+; CHECK-NEXT: %1 = shl i32 %x, 27
+; CHECK-NEXT: %sext = ashr i32 %1, 31
+; CHECK-NEXT: ret i32 %sext
+}
+
+define i32 @test16(i16 %x) nounwind {
+  %and = and i16 %x, 8
+  %cmp = icmp eq i16 %and, 8
+  %ext = sext i1 %cmp to i32
+  ret i32 %ext
+; CHECK: @test16
+; CHECK-NEXT: %1 = shl i16 %x, 12
+; CHECK-NEXT: %sext = ashr i16 %1, 15
+; CHECK-NEXT: %ext = sext i16 %sext to i32
+; CHECK-NEXT: ret i32 %ext
+}
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 7fab1d2..bded68a 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -485,3 +485,24 @@ entry:
 ; CHECK: ret i8 %tmp551
   ret i8 %tmp55
 }
+
+; PR9809
+define i32 @test40(i32 %a, i32 %b) nounwind {
+  %shl1 = shl i32 1, %b
+  %shl2 = shl i32 %shl1, 2
+  %div = udiv i32 %a, %shl2
+  ret i32 %div
+; CHECK: @test40
+; CHECK-NEXT: add i32 %b, 2
+; CHECK-NEXT: lshr i32 %a
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test41(i32 %a, i32 %b) nounwind {
+  %1 = shl i32 1, %b
+  %2 = shl i32 %1, 3
+  ret i32 %2
+; CHECK: @test41
+; CHECK-NEXT: shl i32 8, %b
+; CHECK-NEXT: ret i32
+}
diff --git a/test/Transforms/InstCombine/sign-test-and-or.ll b/test/Transforms/InstCombine/sign-test-and-or.ll
new file mode 100644
index 0000000..47f5f30
--- /dev/null
+++ b/test/Transforms/InstCombine/sign-test-and-or.ll
@@ -0,0 +1,79 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare void @foo()
+
+define void @test1(i32 %a, i32 %b) nounwind {
+  %1 = icmp slt i32 %a, 0
+  %2 = icmp slt i32 %b, 0
+  %or.cond = or i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test1
+; CHECK-NEXT: %1 = or i32 %a, %b
+; CHECK-NEXT: %2 = icmp slt i32 %1, 0
+; CHECK-NEXT: br
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test2(i32 %a, i32 %b) nounwind {
+  %1 = icmp sgt i32 %a, -1
+  %2 = icmp sgt i32 %b, -1
+  %or.cond = or i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test2
+; CHECK-NEXT: %1 = and i32 %a, %b
+; CHECK-NEXT: %2 = icmp sgt i32 %1, -1
+; CHECK-NEXT: br
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test3(i32 %a, i32 %b) nounwind {
+  %1 = icmp slt i32 %a, 0
+  %2 = icmp slt i32 %b, 0
+  %or.cond = and i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test3
+; CHECK-NEXT: %1 = and i32 %a, %b
+; CHECK-NEXT: %2 = icmp slt i32 %1, 0
+; CHECK-NEXT: br
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test4(i32 %a, i32 %b) nounwind {
+  %1 = icmp sgt i32 %a, -1
+  %2 = icmp sgt i32 %b, -1
+  %or.cond = and i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test4
+; CHECK-NEXT: %1 = or i32 %a, %b
+; CHECK-NEXT: %2 = icmp sgt i32 %1, -1
+; CHECK-NEXT: br
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strcpy_chk-64.ll b/test/Transforms/InstCombine/strcpy_chk-64.ll
new file mode 100644
index 0000000..036fcbe
--- /dev/null
+++ b/test/Transforms/InstCombine/strcpy_chk-64.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @func(i8* %i) nounwind ssp {
+; CHECK: @func
+; CHECK: @__strcpy_chk(i8* %arraydecay, i8* %i, i64 32)
+entry:
+  %s = alloca [32 x i8], align 16
+  %arraydecay = getelementptr inbounds [32 x i8]* %s, i32 0, i32 0
+  %call = call i8* @__strcpy_chk(i8* %arraydecay, i8* %i, i64 32)
+  call void @func2(i8* %arraydecay)
+  ret void
+}
+
+declare i8* @__strcpy_chk(i8*, i8*, i64) nounwind
+
+declare void @func2(i8*)
diff --git a/test/Transforms/InstCombine/udivrem-change-width.ll b/test/Transforms/InstCombine/udivrem-change-width.ll
index 9983944..b388a3b0 100644
--- a/test/Transforms/InstCombine/udivrem-change-width.ll
+++ b/test/Transforms/InstCombine/udivrem-change-width.ll
@@ -1,14 +1,16 @@
-; RUN: opt < %s -instcombine -S | not grep zext
-; PR4548
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
+; PR4548
 define i8 @udiv_i8(i8 %a, i8 %b) nounwind {
   %conv = zext i8 %a to i32       
   %conv2 = zext i8 %b to i32      
   %div = udiv i32 %conv, %conv2   
   %conv3 = trunc i32 %div to i8   
   ret i8 %conv3
+; CHECK: @udiv_i8
+; CHECK: udiv i8 %a, %b
 }
 
 define i8 @urem_i8(i8 %a, i8 %b) nounwind {
@@ -17,5 +19,44 @@ define i8 @urem_i8(i8 %a, i8 %b) nounwind {
   %div = urem i32 %conv, %conv2   
   %conv3 = trunc i32 %div to i8   
   ret i8 %conv3
+; CHECK: @urem_i8
+; CHECK: urem i8 %a, %b
 }
 
+define i32 @udiv_i32(i8 %a, i8 %b) nounwind {
+  %conv = zext i8 %a to i32
+  %conv2 = zext i8 %b to i32
+  %div = udiv i32 %conv, %conv2
+  ret i32 %div
+; CHECK: @udiv_i32
+; CHECK: udiv i8 %a, %b
+; CHECK: zext
+}
+
+define i32 @urem_i32(i8 %a, i8 %b) nounwind {
+  %conv = zext i8 %a to i32
+  %conv2 = zext i8 %b to i32
+  %div = urem i32 %conv, %conv2
+  ret i32 %div
+; CHECK: @urem_i32
+; CHECK: urem i8 %a, %b
+; CHECK: zext
+}
+
+define i32 @udiv_i32_c(i8 %a) nounwind {
+  %conv = zext i8 %a to i32
+  %div = udiv i32 %conv, 10
+  ret i32 %div
+; CHECK: @udiv_i32_c
+; CHECK: udiv i8 %a, 10
+; CHECK: zext
+}
+
+define i32 @urem_i32_c(i8 %a) nounwind {
+  %conv = zext i8 %a to i32
+  %div = urem i32 %conv, 10
+  ret i32 %div
+; CHECK: @urem_i32_c
+; CHECK: urem i8 %a, 10
+; CHECK: zext
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 250e44c..d2c564f 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -137,22 +137,38 @@ define i1 @shl(i32 %x) {
 ; CHECK: ret i1 false
 }
 
-define i1 @lshr(i32 %x) {
-; CHECK: @lshr
+define i1 @lshr1(i32 %x) {
+; CHECK: @lshr1
   %s = lshr i32 -1, %x
   %c = icmp eq i32 %s, 0
   ret i1 %c
 ; CHECK: ret i1 false
 }
 
-define i1 @ashr(i32 %x) {
-; CHECK: @ashr
+define i1 @lshr2(i32 %x) {
+; CHECK: @lshr2
+  %s = lshr i32 %x, 30
+  %c = icmp ugt i32 %s, 8
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @ashr1(i32 %x) {
+; CHECK: @ashr1
   %s = ashr i32 -1, %x
   %c = icmp eq i32 %s, 0
   ret i1 %c
 ; CHECK: ret i1 false
 }
 
+define i1 @ashr2(i32 %x) {
+; CHECK: @ashr2
+  %s = ashr i32 %x, 30
+  %c = icmp slt i32 %s, -5
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
 define i1 @select1(i1 %cond) {
 ; CHECK: @select1
   %s = select i1 %cond, i32 1, i32 0
@@ -187,3 +203,134 @@ define i1 @select4(i1 %cond) {
   ret i1 %c
 ; CHECK: ret i1 %cond
 }
+
+define i1 @urem1(i32 %X, i32 %Y) {
+; CHECK: @urem1
+  %A = urem i32 %X, %Y
+  %B = icmp ult i32 %A, %Y
+  ret i1 %B
+; CHECK: ret i1 true
+}
+
+define i1 @urem2(i32 %X, i32 %Y) {
+; CHECK: @urem2
+  %A = urem i32 %X, %Y
+  %B = icmp eq i32 %A, %Y
+  ret i1 %B
+; CHECK: ret i1 false
+}
+
+define i1 @urem3(i32 %X) {
+; CHECK: @urem3
+  %A = urem i32 %X, 10
+  %B = icmp ult i32 %A, 15
+  ret i1 %B
+; CHECK: ret i1 true
+}
+
+define i1 @urem4(i32 %X) {
+; CHECK: @urem4
+  %A = urem i32 %X, 15
+  %B = icmp ult i32 %A, 10
+  ret i1 %B
+; CHECK: ret i1 %B
+}
+
+define i1 @urem5(i16 %X, i32 %Y) {
+; CHECK: @urem5
+  %A = zext i16 %X to i32
+  %B = urem i32 %A, %Y
+  %C = icmp slt i32 %B, %Y
+  ret i1 %C
+; CHECK: ret i1 true
+}
+
+define i1 @urem6(i32 %X, i32 %Y) {
+; CHECK: @urem6
+  %A = urem i32 %X, %Y
+  %B = icmp ugt i32 %Y, %A
+  ret i1 %B
+; CHECK: ret i1 true
+}
+
+define i1 @srem1(i32 %X) {
+; CHECK: @srem1
+  %A = srem i32 %X, -5
+  %B = icmp sgt i32 %A, 5
+  ret i1 %B
+; CHECK: ret i1 false
+}
+
+; PR9343 #15
+; CHECK: @srem2
+; CHECK: ret i1 false
+define i1 @srem2(i16 %X, i32 %Y) {
+  %A = zext i16 %X to i32
+  %B = add nsw i32 %A, 1
+  %C = srem i32 %B, %Y
+  %D = icmp slt i32 %C, 0
+  ret i1 %D
+}
+
+; CHECK: @srem3
+; CHECK-NEXT: ret i1 false
+define i1 @srem3(i16 %X, i32 %Y) {
+  %A = zext i16 %X to i32
+  %B = or i32 2147483648, %A
+  %C = sub nsw i32 1, %B
+  %D = srem i32 %C, %Y
+  %E = icmp slt i32 %D, 0
+  ret i1 %E
+}
+
+; CHECK: @srem4
+; CHECK-NEXT: ret i1 false
+define i1 @srem4(i16 %X, i32 %Y) {
+  %A = zext i16 %X to i32
+  %B = or i32 2147483648, %A
+  %C = sub nsw i32 %A, %B
+  %D = srem i32 %C, %Y
+  %E = icmp slt i32 %D, 0
+  ret i1 %E
+}
+
+define i1 @udiv1(i32 %X) {
+; CHECK: @udiv1
+  %A = udiv i32 %X, 1000000
+  %B = icmp ult i32 %A, 5000
+  ret i1 %B
+; CHECK: ret i1 true
+}
+
+define i1 @udiv2(i32 %X, i32 %Y, i32 %Z) {
+; CHECK: @udiv2
+  %A = udiv exact i32 10, %Z
+  %B = udiv exact i32 20, %Z
+  %C = icmp ult i32 %A, %B
+  ret i1 %C
+; CHECK: ret i1 true
+}
+
+define i1 @sdiv1(i32 %X) {
+; CHECK: @sdiv1
+  %A = sdiv i32 %X, 1000000
+  %B = icmp slt i32 %A, 3000
+  ret i1 %B
+; CHECK: ret i1 true
+}
+
+define i1 @or1(i32 %X) {
+; CHECK: @or1
+  %A = or i32 %X, 62
+  %B = icmp ult i32 %A, 50
+  ret i1 %B
+; CHECK: ret i1 false
+}
+
+define i1 @and1(i32 %X) {
+; CHECK: @and1
+  %A = and i32 %X, 62
+  %B = icmp ugt i32 %A, 70
+  ret i1 %B
+; CHECK: ret i1 false
+}
diff --git a/test/Transforms/InstSimplify/rem.ll b/test/Transforms/InstSimplify/rem.ll
new file mode 100644
index 0000000..4c8f87c
--- /dev/null
+++ b/test/Transforms/InstSimplify/rem.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i32 @select1(i32 %x, i1 %b) {
+; CHECK: @select1
+  %rhs = select i1 %b, i32 %x, i32 1
+  %rem = srem i32 %x, %rhs
+  ret i32 %rem
+; CHECK: ret i32 0
+}
+
+define i32 @select2(i32 %x, i1 %b) {
+; CHECK: @select2
+  %rhs = select i1 %b, i32 %x, i32 1
+  %rem = urem i32 %x, %rhs
+  ret i32 %rem
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/Internalize/available_externally.ll b/test/Transforms/Internalize/available_externally.ll
new file mode 100644
index 0000000..a2cf23f
--- /dev/null
+++ b/test/Transforms/Internalize/available_externally.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -internalize -internalize-public-api-list foo -S | FileCheck %s
+
+; CHECK: define void @foo
+define void @foo() {
+  ret void
+}
+
+; CHECK: define internal void @zed
+define void @zed() {
+  ret void
+}
+
+; CHECK: define available_externally void @bar
+define available_externally void @bar() {
+  ret void
+}
diff --git a/test/Transforms/JumpThreading/2011-04-02-SimplifyDeadBlock.ll b/test/Transforms/JumpThreading/2011-04-02-SimplifyDeadBlock.ll
new file mode 100644
index 0000000..76dd2d1
--- /dev/null
+++ b/test/Transforms/JumpThreading/2011-04-02-SimplifyDeadBlock.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -jump-threading
+; PR9446
+; Just check that it doesn't crash
+
+define void @int327() nounwind {
+entry:
+  unreachable
+
+for.cond:                                         ; preds = %for.cond4
+  %tobool3 = icmp eq i8 undef, 0
+  br i1 %tobool3, label %for.cond23, label %for.cond4
+
+for.cond4:                                        ; preds = %for.cond
+  br label %for.cond
+
+for.cond23:                                       ; preds = %for.body28, %for.cond23, %for.cond
+  %conv321 = phi i32 [ %conv32, %for.body28 ], [ 0, %for.cond ], [ %conv321, %for.cond23 ]
+  %l_266.0 = phi i32 [ %phitmp, %for.body28 ], [ 0, %for.cond ], [ 0, %for.cond23 ]
+  %cmp26 = icmp eq i32 %l_266.0, 0
+  br i1 %cmp26, label %for.body28, label %for.cond23
+
+for.body28:                                       ; preds = %for.cond23
+  %and = and i32 %conv321, 1
+  %conv32 = zext i8 undef to i32
+  %add = add nsw i32 %l_266.0, 1
+  %phitmp = and i32 %add, 255
+  br label %for.cond23
+
+if.end43:                                         ; No predecessors!
+  ret void
+}
+
diff --git a/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll b/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
new file mode 100644
index 0000000..46aaa00
--- /dev/null
+++ b/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
@@ -0,0 +1,31 @@
+; RUN: opt -jump-threading < %s
+; <rdar://problem/9284786>
+
+%0 = type <{ i64, i16, i64, i8, i8 }>
+
+@g_338 = external global %0, align 8
+
+define void @func_1() nounwind ssp {
+entry:
+  ret void
+
+for.cond1177:
+  %inc1187 = add nsw i32 0, 1
+  %cmp1179 = icmp slt i32 %inc1187, 5
+  br i1 %cmp1179, label %for.cond1177, label %land.rhs1320
+
+land.rhs1320:
+  %tmp1324 = volatile load i64* getelementptr inbounds (%0* @g_338, i64 0, i32 2), align 1, !tbaa !0
+  br label %if.end.i
+
+if.end.i:
+  %tobool.pr.i = phi i1 [ false, %if.end.i ], [ false, %land.rhs1320 ]
+  br i1 %tobool.pr.i, label %return, label %if.end.i
+
+return:
+  ret void
+}
+
+!0 = metadata !{metadata !"long long", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/Transforms/JumpThreading/pr9331.ll b/test/Transforms/JumpThreading/pr9331.ll
new file mode 100644
index 0000000..4c06d52
--- /dev/null
+++ b/test/Transforms/JumpThreading/pr9331.ll
@@ -0,0 +1,50 @@
+; RUN: opt -jump-threading -S < %s
+
+define void @func(i8 zeroext %p_44) nounwind {
+entry:
+  br i1 false, label %for.cond2, label %if.end50
+
+for.cond2:                                        ; preds = %for.inc46, %lor.end, %entry
+  %p_44.addr.1 = phi i8 [ %p_44.addr.1, %lor.end ], [ %p_44, %entry ], [ %p_44.addr.1, %for.inc46 ]
+  br i1 undef, label %for.inc46, label %for.body5
+
+for.body5:                                        ; preds = %for.cond2
+  br i1 undef, label %lbl_465, label %if.then9
+
+if.then9:                                         ; preds = %for.body5
+  br label %return
+
+lbl_465:                                          ; preds = %lbl_465, %for.body5
+  %tobool19 = icmp eq i8 undef, 0
+  br i1 %tobool19, label %if.end21, label %lbl_465
+
+if.end21:                                         ; preds = %lbl_465
+  %conv23 = zext i8 %p_44.addr.1 to i64
+  %xor = xor i64 %conv23, 1
+  %tobool.i = icmp eq i64 %conv23, 0
+  br i1 %tobool.i, label %cond.false.i, label %safe_mod_func_uint64_t_u_u.exit
+
+cond.false.i:                                     ; preds = %if.end21
+  %div.i = udiv i64 %xor, %conv23
+  br label %safe_mod_func_uint64_t_u_u.exit
+
+safe_mod_func_uint64_t_u_u.exit:                  ; preds = %cond.false.i, %if.end21
+  %cond.i = phi i64 [ %div.i, %cond.false.i ], [ %conv23, %if.end21 ]
+  %tobool28 = icmp eq i64 %cond.i, 0
+  br i1 %tobool28, label %lor.rhs, label %lor.end
+
+lor.rhs:                                          ; preds = %safe_mod_func_uint64_t_u_u.exit
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %safe_mod_func_uint64_t_u_u.exit
+  br label %for.cond2
+
+for.inc46:                                        ; preds = %for.cond2
+  br label %for.cond2
+
+if.end50:                                         ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %if.end50, %if.then9
+  ret void
+}
diff --git a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
index 7a80f80..1534585 100644
--- a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
+++ b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
@@ -1,7 +1,5 @@
 ; RUN: opt < %s -loop-simplify -lcssa -S | \
 ; RUN:   grep {%%SJE.0.0.lcssa = phi .struct.SetJmpMapEntry}
-; RUN: opt < %s -loop-simplify -lcssa -S | \
-; RUN:   grep {%%SJE.0.0.lcssa1 = phi .struct.SetJmpMapEntry}
 
         %struct.SetJmpMapEntry = type { i8*, i32, %struct.SetJmpMapEntry* }
 
diff --git a/test/Transforms/LCSSA/unused-phis.ll b/test/Transforms/LCSSA/unused-phis.ll
new file mode 100644
index 0000000..aa2ab96
--- /dev/null
+++ b/test/Transforms/LCSSA/unused-phis.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -lcssa -S | FileCheck %s
+; CHECK: exit1:
+; CHECK: .lcssa =
+; CHECK: exit2:
+; CHECK: .lcssa2 =
+; CHECK: exit3:
+; CHECK-NOT: .lcssa1 =
+
+; Test to ensure that when there are multiple exit blocks, PHI nodes are
+; only inserted by LCSSA when there is a use dominated by a given exit
+; block.
+
+declare void @printf(i32 %i)
+
+define i32 @unused_phis() nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [0, %entry], [1, %then2]
+  br i1 undef, label %exit1, label %then1
+
+then1:
+  br i1 undef, label %exit2, label %then2
+
+then2:
+  br i1 undef, label %exit3, label %loop
+
+exit1:
+  call void @printf(i32 %i)
+  ret i32 %i
+
+exit2:
+  ret i32 %i
+
+exit3:
+  ret i32 0
+}
diff --git a/test/Transforms/LICM/2007-10-01-PromoteSafeValue.ll b/test/Transforms/LICM/2007-10-01-PromoteSafeValue.ll
index 59f1dcb..e3d0d02 100644
--- a/test/Transforms/LICM/2007-10-01-PromoteSafeValue.ll
+++ b/test/Transforms/LICM/2007-10-01-PromoteSafeValue.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -licm -S | grep promoted
+; RUN: opt < %s -licm -S | FileCheck %s
 ; Promote value if at least one use is safe
 
 
@@ -15,6 +15,8 @@ cond.true:              ; preds = %loop.head
         store i32 40, i32* %p
         br label %loop.head
 
+; CHECK: exit:
+; CHECK: store i32 20, i32* %p
 exit:           ; preds = %loop.head
         ret i32 0
 }
diff --git a/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll b/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll
new file mode 100644
index 0000000..5774f58
--- /dev/null
+++ b/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+; PR9630
+
+@g_39 = external global i16, align 2
+
+declare i32* @func_84(i32** nocapture) nounwind readonly
+
+declare i32** @func_108(i32*** nocapture) nounwind readonly
+
+define void @func() nounwind {
+entry:
+  br label %for.body4.lr.ph
+
+for.body4.lr.ph:
+  br label %for.body4
+
+; CHECK: for.body4:
+; CHECK: volatile load i16* @g_39
+
+for.body4:
+  %l_612.11 = phi i32* [ undef, %for.body4.lr.ph ], [ %call19, %for.body4 ]
+  %tmp7 = volatile load i16* @g_39, align 2
+  %call = call i32** @func_108(i32*** undef)
+  %call19 = call i32* @func_84(i32** %call)
+  br i1 false, label %for.body4, label %for.cond.loopexit
+
+for.cond.loopexit:
+  br i1 false, label %for.body4.lr.ph, label %for.end26
+
+for.end26:
+  ret void
+}
diff --git a/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll b/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
new file mode 100644
index 0000000..86c2679
--- /dev/null
+++ b/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -tbaa -licm -S | FileCheck %s
+; PR9634
+
+@g_58 = common global i32 0, align 4
+@g_116 = common global i32* null, align 8
+
+define void @f() nounwind {
+
+; CHECK: entry:
+; CHECK: alloca [9 x i16]
+; CHECK: load i32* @g_58
+; CHECK: br label %for.body
+
+entry:
+  %l_87.i = alloca [9 x i16], align 16
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %inc12 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  store i32* @g_58, i32** @g_116, align 8, !tbaa !0
+  %tmp2 = load i32** @g_116, align 8, !tbaa !0
+  %tmp3 = load i32* %tmp2, !tbaa !4
+  %or = or i32 %tmp3, 10
+  store i32 %or, i32* %tmp2, !tbaa !4
+  %inc = add nsw i32 %inc12, 1
+  %cmp = icmp slt i32 %inc, 4
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc
+  ret void
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"short", metadata !1}
+!4 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/LICM/2011-04-09-RAUW-AST.ll b/test/Transforms/LICM/2011-04-09-RAUW-AST.ll
new file mode 100644
index 0000000..4285bd1
--- /dev/null
+++ b/test/Transforms/LICM/2011-04-09-RAUW-AST.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -loop-rotate -licm -S | FileCheck %s
+; PR9604
+
+@g_3 = global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00"
+
+define i32 @main() nounwind {
+entry:
+  %tmp = load i32* @g_3, align 4
+  %tobool = icmp eq i32 %tmp, 0
+  br i1 %tobool, label %for.cond, label %if.then
+
+if.then:                                          ; preds = %entry
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc10, %if.then, %entry
+  %g.0 = phi i32* [ %g.0, %for.inc10 ], [ @g_3, %entry ], [ null, %if.then ]
+  %x.0 = phi i32 [ %inc12, %for.inc10 ], [ 0, %entry ], [ 0, %if.then ]
+  %cmp = icmp slt i32 %x.0, 5
+  br i1 %cmp, label %for.cond4, label %for.end13
+
+for.cond4:                                        ; preds = %for.body7, %for.cond
+  %y.0 = phi i32 [ %inc, %for.body7 ], [ 0, %for.cond ]
+  %cmp6 = icmp slt i32 %y.0, 5
+  br i1 %cmp6, label %for.body7, label %for.inc10
+
+; CHECK: for.body7:
+; CHECK-NEXT: phi
+; CHECK-NEXT: store i32 0
+; CHECK-NEXT: store i32 1
+
+for.body7:                                        ; preds = %for.cond4
+  store i32 0, i32* @g_3, align 4
+  store i32 1, i32* %g.0, align 4
+  %inc = add nsw i32 %y.0, 1
+  br label %for.cond4
+
+for.inc10:                                        ; preds = %for.cond4
+  %inc12 = add nsw i32 %x.0, 1
+  br label %for.cond
+
+for.end13:                                        ; preds = %for.cond
+  %tmp14 = load i32* @g_3, align 4
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %tmp14) nounwind
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
diff --git a/test/Transforms/LICM/debug-value.ll b/test/Transforms/LICM/debug-value.ll
new file mode 100644
index 0000000..889d4e2
--- /dev/null
+++ b/test/Transforms/LICM/debug-value.ll
@@ -0,0 +1,62 @@
+; RUN: opt -licm -basicaa < %s -S | FileCheck %s
+
+define void @dgefa() nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond.backedge, %entry
+  br i1 undef, label %if.then, label %for.cond.backedge, !dbg !11
+
+for.cond.backedge:                                ; preds = %for.body61, %for.body61.us, %for.body
+  br i1 undef, label %for.end104, label %for.body, !dbg !15
+
+if.then:                                          ; preds = %for.body
+  br i1 undef, label %if.then27, label %if.end.if.end.split_crit_edge.critedge, !dbg !16
+
+if.then27:                                        ; preds = %if.then
+; CHECK: tail call void @llvm.dbg.value
+  tail call void @llvm.dbg.value(metadata !18, i64 0, metadata !19), !dbg !21
+  br label %for.body61.us
+
+if.end.if.end.split_crit_edge.critedge:           ; preds = %if.then
+  br label %for.body61
+
+for.body61.us:                                    ; preds = %for.body61.us, %if.then27
+  br i1 undef, label %for.cond.backedge, label %for.body61.us, !dbg !23
+
+for.body61:                                       ; preds = %for.body61, %if.end.if.end.split_crit_edge.critedge
+  br i1 undef, label %for.cond.backedge, label %for.body61, !dbg !23
+
+for.end104:                                       ; preds = %for.cond.backedge
+  ret void, !dbg !24
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !6, !9, !10}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"idamax", metadata !"idamax", metadata !"", metadata !1, i32 112, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 127169)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"dscal", metadata !"dscal", metadata !"", metadata !1, i32 206, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null}
+!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"daxpy", metadata !"daxpy", metadata !"", metadata !1, i32 230, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"dgefa", metadata !"dgefa", metadata !"", metadata !1, i32 267, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 281, i32 9, metadata !12, null}
+!12 = metadata !{i32 589835, metadata !13, i32 272, i32 5, metadata !1, i32 32} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 589835, metadata !14, i32 271, i32 5, metadata !1, i32 31} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 589835, metadata !10, i32 267, i32 1, metadata !1, i32 30} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 271, i32 5, metadata !14, null}
+!16 = metadata !{i32 284, i32 10, metadata !17, null}
+!17 = metadata !{i32 589835, metadata !12, i32 282, i32 9, metadata !1, i32 33} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{double undef}
+!19 = metadata !{i32 590080, metadata !14, metadata !"temp", metadata !1, i32 268, metadata !20, i32 0} ; [ DW_TAG_auto_variable ]
+!20 = metadata !{i32 589860, metadata !2, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 286, i32 14, metadata !22, null}
+!22 = metadata !{i32 589835, metadata !17, i32 285, i32 13, metadata !1, i32 34} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 296, i32 13, metadata !17, null}
+!24 = metadata !{i32 313, i32 1, metadata !14, null}
diff --git a/test/Transforms/LoopIdiom/debug-line.ll b/test/Transforms/LoopIdiom/debug-line.ll
new file mode 100644
index 0000000..d31662d
--- /dev/null
+++ b/test/Transforms/LoopIdiom/debug-line.ll
@@ -0,0 +1,49 @@
+; RUN: opt -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+
+define void @foo(double* nocapture %a) nounwind ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{double* %a}, i64 0, metadata !5), !dbg !8
+  tail call void @llvm.dbg.value(metadata !9, i64 0, metadata !10), !dbg !14
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
+  %arrayidx = getelementptr double* %a, i64 %indvar
+; CHECK: call void @llvm.memset{{.+}} !dbg 
+  store double 0.000000e+00, double* %arrayidx, align 8, !dbg !15
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp ne i64 %indvar.next, 1000
+  br i1 %exitcond, label %for.body, label %for.end, !dbg !14
+
+for.end:                                          ; preds = %for.body
+  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !10), !dbg !16
+  ret void, !dbg !17
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (double*)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"li.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"li.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 127165:127174)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777218, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!6 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 589860, metadata !2, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 2, i32 18, metadata !0, null}
+!9 = metadata !{i32 0}
+!10 = metadata !{i32 590080, metadata !11, metadata !"i", metadata !1, i32 3, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 589835, metadata !12, i32 3, i32 3, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 589835, metadata !0, i32 2, i32 21, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 3, i32 3, metadata !12, null}
+!15 = metadata !{i32 4, i32 5, metadata !11, null}
+!16 = metadata !{i32 3, i32 29, metadata !11, null}
+!17 = metadata !{i32 5, i32 1, metadata !12, null}
diff --git a/test/Transforms/LoopRotate/crash.ll b/test/Transforms/LoopRotate/crash.ll
index 9dc9862..16a6868 100644
--- a/test/Transforms/LoopRotate/crash.ll
+++ b/test/Transforms/LoopRotate/crash.ll
@@ -137,3 +137,19 @@ bb17:		; preds = %bb15
 }
 
 
+
+
+; PR9523 - Non-canonical loop.
+define void @test7(i8* %P) nounwind {
+entry:
+  indirectbr i8* %P, [label %"3", label %"5"]
+
+"3":                                              ; preds = %"4", %entry
+  br i1 undef, label %"5", label %"4"
+
+"4":                                              ; preds = %"3"
+  br label %"3"
+
+"5":                                              ; preds = %"3", %entry
+  ret void
+}
diff --git a/test/Transforms/LoopSimplify/merge-exits.ll b/test/Transforms/LoopSimplify/merge-exits.ll
index 93a2247..e5e4717 100644
--- a/test/Transforms/LoopSimplify/merge-exits.ll
+++ b/test/Transforms/LoopSimplify/merge-exits.ll
@@ -7,7 +7,7 @@
 ; that indvars can promote the induction variable to i64
 ; without needing casts.
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
 
 define float @t(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/invariant_value_first.ll b/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
index 4094e9c..986a55a 100644
--- a/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
+++ b/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
@@ -1,7 +1,8 @@
 ; Check that the index of 'P[outer]' is pulled out of the loop.
-; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | \
+; RUN: opt < %s -loop-reduce -S | \
 ; RUN:   not grep {getelementptr.*%outer.*%INDVAR}
 
+target datalayout = "e-p:32:32:32-n:8:16:32"
 declare i1 @pred()
 
 declare i32 @foo()
diff --git a/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll b/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
index e2aed78..1d43961 100644
--- a/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
+++ b/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
@@ -1,7 +1,8 @@
 ; Check that the index of 'P[outer]' is pulled out of the loop.
-; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | \
+; RUN: opt < %s -loop-reduce -S | \
 ; RUN:   not grep {getelementptr.*%outer.*%INDVAR}
 
+target datalayout = "e-p:32:32:32-n:32"
 declare i1 @pred()
 
 define void @test([10000 x i32]* %P, i32 %outer) {
diff --git a/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll b/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll
index 410d88f..00bd068 100644
--- a/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll
+++ b/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll
@@ -1,7 +1,9 @@
 ; Check that this test makes INDVAR and related stuff dead, because P[indvar]
 ; gets reduced, making INDVAR dead.
 
-; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | not grep INDVAR
+; RUN: opt < %s -loop-reduce -S | not grep INDVAR
+
+target datalayout = "e-p:32:32:32-n:32"
 
 declare i1 @pred()
 
diff --git a/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll b/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll
index 0a9fab0..7547d83 100644
--- a/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll
+++ b/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll
@@ -9,7 +9,7 @@
 
 ; mul uint %i, 3
 
-target datalayout = "e-p:32:32"
+target datalayout = "e-p:32:32-n:32"
 target triple = "i686-apple-darwin8"
 @flags2 = external global [8193 x i8], align 32		; <[8193 x i8]*> [#uses=1]
 
diff --git a/test/Transforms/PhaseOrdering/basic.ll b/test/Transforms/PhaseOrdering/basic.ll
new file mode 100644
index 0000000..c66e150
--- /dev/null
+++ b/test/Transforms/PhaseOrdering/basic.ll
@@ -0,0 +1,117 @@
+; RUN: opt -O3 -S %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.7"
+
+declare i8* @malloc(i64)
+declare void @free(i8*)
+
+
+; PR2338
+define void @test1() nounwind ssp {
+  %retval = alloca i32, align 4
+  %i = alloca i8*, align 8
+  %call = call i8* @malloc(i64 1)
+  store i8* %call, i8** %i, align 8
+  %tmp = load i8** %i, align 8
+  store i8 1, i8* %tmp
+  %tmp1 = load i8** %i, align 8
+  call void @free(i8* %tmp1)
+  ret void
+
+; CHECK: @test1
+; CHECK-NEXT: ret void
+}
+
+
+; PR6627 - This whole nasty sequence should be flattened down to a single
+; 32-bit comparison.
+define void @test2(i8* %arrayidx) nounwind ssp {
+entry:
+  %xx = bitcast i8* %arrayidx to i32*
+  %x1 = load i32* %xx, align 4
+  %tmp = trunc i32 %x1 to i8
+  %conv = zext i8 %tmp to i32
+  %cmp = icmp eq i32 %conv, 127
+  br i1 %cmp, label %land.lhs.true, label %if.end
+
+land.lhs.true:                                    ; preds = %entry
+  %arrayidx4 = getelementptr inbounds i8* %arrayidx, i64 1
+  %tmp5 = load i8* %arrayidx4, align 1
+  %conv6 = zext i8 %tmp5 to i32
+  %cmp7 = icmp eq i32 %conv6, 69
+  br i1 %cmp7, label %land.lhs.true9, label %if.end
+
+land.lhs.true9:                                   ; preds = %land.lhs.true
+  %arrayidx12 = getelementptr inbounds i8* %arrayidx, i64 2
+  %tmp13 = load i8* %arrayidx12, align 1
+  %conv14 = zext i8 %tmp13 to i32
+  %cmp15 = icmp eq i32 %conv14, 76
+  br i1 %cmp15, label %land.lhs.true17, label %if.end
+
+land.lhs.true17:                                  ; preds = %land.lhs.true9
+  %arrayidx20 = getelementptr inbounds i8* %arrayidx, i64 3
+  %tmp21 = load i8* %arrayidx20, align 1
+  %conv22 = zext i8 %tmp21 to i32
+  %cmp23 = icmp eq i32 %conv22, 70
+  br i1 %cmp23, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.lhs.true17
+  %call25 = call i32 (...)* @doo()
+  br label %if.end
+
+if.end:
+  ret void
+
+; CHECK: @test2
+; CHECK: %x1 = load i32* %xx, align 4
+; CHECK-NEXT: icmp eq i32 %x1, 1179403647
+; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end 
+}
+
+declare i32 @doo(...)
+
+; PR6627 - This should all be flattened down to one compare.  This is the same
+; as test2, except that the initial load is done as an i8 instead of i32, thus
+; requiring widening.
+define void @test2a(i8* %arrayidx) nounwind ssp {
+entry:
+  %x1 = load i8* %arrayidx, align 4
+  %conv = zext i8 %x1 to i32
+  %cmp = icmp eq i32 %conv, 127
+  br i1 %cmp, label %land.lhs.true, label %if.end
+
+land.lhs.true:                                    ; preds = %entry
+  %arrayidx4 = getelementptr inbounds i8* %arrayidx, i64 1
+  %tmp5 = load i8* %arrayidx4, align 1
+  %conv6 = zext i8 %tmp5 to i32
+  %cmp7 = icmp eq i32 %conv6, 69
+  br i1 %cmp7, label %land.lhs.true9, label %if.end
+
+land.lhs.true9:                                   ; preds = %land.lhs.true
+  %arrayidx12 = getelementptr inbounds i8* %arrayidx, i64 2
+  %tmp13 = load i8* %arrayidx12, align 1
+  %conv14 = zext i8 %tmp13 to i32
+  %cmp15 = icmp eq i32 %conv14, 76
+  br i1 %cmp15, label %land.lhs.true17, label %if.end
+
+land.lhs.true17:                                  ; preds = %land.lhs.true9
+  %arrayidx20 = getelementptr inbounds i8* %arrayidx, i64 3
+  %tmp21 = load i8* %arrayidx20, align 1
+  %conv22 = zext i8 %tmp21 to i32
+  %cmp23 = icmp eq i32 %conv22, 70
+  br i1 %cmp23, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.lhs.true17
+  %call25 = call i32 (...)* @doo()
+  br label %if.end
+
+if.end:
+  ret void
+
+; CHECK: @test2a
+; CHECK: %x1 = load i32* {{.*}}, align 4
+; CHECK-NEXT: icmp eq i32 %x1, 1179403647
+; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end 
+}
+
diff --git a/test/Transforms/Reassociate/crash.ll b/test/Transforms/Reassociate/crash.ll
index 6f21b66..7a81942 100644
--- a/test/Transforms/Reassociate/crash.ll
+++ b/test/Transforms/Reassociate/crash.ll
@@ -42,3 +42,28 @@ define i32 @test3(i32 %Arg, i32 %x1, i32 %x2, i32 %x3) {
  %E = add i32 %D, %C
   ret i32 %E
 }
+
+
+; rdar://9096268
+define void @x66303361ae3f602889d1b7d0f86e5455(i8* %arg) nounwind {
+_:
+  br label %_33
+
+_33:                                              ; preds = %_33, %_
+  %tmp348 = load i8* %arg, align 1
+  %tmp349 = lshr i8 %tmp348, 7
+  %tmp350 = or i8 %tmp349, 42
+  %tmp351 = add i8 %tmp350, -42
+  %tmp352 = zext i8 %tmp351 to i32
+  %tmp358 = add i32 %tmp352, -501049439
+  %tmp359 = mul i32 %tmp358, %tmp358
+  %tmp360 = mul i32 %tmp352, %tmp352
+  %tmp361 = sub i32 %tmp359, %tmp360
+  %tmp362 = mul i32 %tmp361, -920056735
+  %tmp363 = add i32 %tmp362, 501049439
+  %tmp364 = add i32 %tmp362, -2000262972
+  %tmp365 = sub i32 %tmp363, %tmp364
+  %tmp366 = sub i32 -501049439, %tmp362
+  %tmp367 = add i32 %tmp365, %tmp366
+  br label %_33
+}
diff --git a/test/Transforms/Reassociate/secondary.ll b/test/Transforms/Reassociate/secondary.ll
new file mode 100644
index 0000000..a52000a
--- /dev/null
+++ b/test/Transforms/Reassociate/secondary.ll
@@ -0,0 +1,24 @@
+; RUN: opt -S -reassociate < %s | FileCheck %s
+; rdar://9167457
+
+; Reassociate shouldn't break this testcase involving a secondary
+; reassociation.
+
+; CHECK:     define
+; CHECK-NOT: undef
+; CHECK:     %factor = mul i32 %tmp3, -2
+; CHECK-NOT: undef
+; CHECK:     }
+
+define void @x0f2f640ab6718391b59ce96d9fdeda54(i32 %arg, i32 %arg1, i32 %arg2, i32* %.out) nounwind {
+_:
+  %tmp = sub i32 %arg, %arg1
+  %tmp3 = mul i32 %tmp, -1268345047
+  %tmp4 = add i32 %tmp3, 2014710503
+  %tmp5 = add i32 %tmp3, -1048397418
+  %tmp6 = sub i32 %tmp4, %tmp5
+  %tmp7 = sub i32 -2014710503, %tmp3
+  %tmp8 = add i32 %tmp6, %tmp7
+  store i32 %tmp8, i32* %.out
+  ret void
+}
diff --git a/test/Transforms/SCCP/apint-basictest.ll b/test/Transforms/SCCP/apint-basictest.ll
index c03bfef..f6ef1ab 100644
--- a/test/Transforms/SCCP/apint-basictest.ll
+++ b/test/Transforms/SCCP/apint-basictest.ll
@@ -1,4 +1,4 @@
-; This is a basic sanity check for constant propogation.  The add instruction 
+; This is a basic sanity check for constant propagation.  The add instruction
 ; should be eliminated.
 
 ; RUN: opt < %s -sccp -S | not grep add
diff --git a/test/Transforms/SCCP/apint-basictest2.ll b/test/Transforms/SCCP/apint-basictest2.ll
index 1734827..ad8b4a4 100644
--- a/test/Transforms/SCCP/apint-basictest2.ll
+++ b/test/Transforms/SCCP/apint-basictest2.ll
@@ -1,4 +1,4 @@
-; This is a basic sanity check for constant propogation.  The add instruction 
+; This is a basic sanity check for constant propagation.  The add instruction
 ; and phi instruction should be eliminated.
 
 ; RUN: opt < %s -sccp -S | not grep phi
diff --git a/test/Transforms/SCCP/apint-basictest3.ll b/test/Transforms/SCCP/apint-basictest3.ll
index 47671bf..b8fcca6 100644
--- a/test/Transforms/SCCP/apint-basictest3.ll
+++ b/test/Transforms/SCCP/apint-basictest3.ll
@@ -1,4 +1,4 @@
-; This is a basic sanity check for constant propogation.  It tests the basic 
+; This is a basic sanity check for constant propagation.  It tests the basic
 ; arithmatic operations.
 
 
diff --git a/test/Transforms/SCCP/apint-basictest4.ll b/test/Transforms/SCCP/apint-basictest4.ll
index 41036ea..8624260 100644
--- a/test/Transforms/SCCP/apint-basictest4.ll
+++ b/test/Transforms/SCCP/apint-basictest4.ll
@@ -1,4 +1,4 @@
-; This is a basic sanity check for constant propogation.  It tests the basic 
+; This is a basic sanity check for constant propagation.  It tests the basic
 ; logic operations.
 
 
diff --git a/test/Transforms/SRETPromotion/2008-03-11-attributes.ll b/test/Transforms/SRETPromotion/2008-03-11-attributes.ll
deleted file mode 100644
index 55abec5..0000000
--- a/test/Transforms/SRETPromotion/2008-03-11-attributes.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: opt < %s -sretpromotion -disable-output
-	%struct.ObjPoint = type { double, double, double, double, double, double }
-
-define void @RotatePoint(%struct.ObjPoint* sret  %agg.result, %struct.ObjPoint* byval  %a, double %rx, double %ry, double %rz) nounwind  {
-entry:
-	unreachable
-}
diff --git a/test/Transforms/SRETPromotion/2008-06-04-function-pointer-passing.ll b/test/Transforms/SRETPromotion/2008-06-04-function-pointer-passing.ll
deleted file mode 100644
index 1168b0b..0000000
--- a/test/Transforms/SRETPromotion/2008-06-04-function-pointer-passing.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; This test lures sretpromotion into promoting the sret argument of foo, even
-; when the function is used as an argument to bar. It used to not check for
-; this, assuming that all users of foo were direct calls, resulting in an
-; assertion failure later on.
-
-; We're mainly testing for opt not to crash, but we'll check to see if the sret
-; attribute is still there for good measure.
-; RUN: opt < %s -sretpromotion -S | grep sret
-
-%struct.S = type <{ i32, i32 }>
-
-define i32 @main() {
-entry:
-	%tmp = alloca %struct.S		; <%struct.S*> [#uses=1]
-	call void @bar( %struct.S* sret  %tmp, void (%struct.S*, ...)* @foo )
-	ret i32 undef
-}
-
-declare void @bar(%struct.S* sret , void (%struct.S*, ...)*)
-
-define internal void @foo(%struct.S* sret  %agg.result, ...) {
-entry:
-	ret void
-}
diff --git a/test/Transforms/SRETPromotion/2008-06-05-non-call-use.ll b/test/Transforms/SRETPromotion/2008-06-05-non-call-use.ll
deleted file mode 100644
index 26c6a6e..0000000
--- a/test/Transforms/SRETPromotion/2008-06-05-non-call-use.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; This test shows an sret function that is used as an operand to a bitcast.
-; StructRetPromotion used to assume that a function was only used by call or
-; invoke instructions, making this code cause an assertion failure.
-
-; We're mainly testing for opt not to crash, but we'll check to see if the sret
-; attribute is still there for good measure.
-; RUN: opt < %s -sretpromotion -S | grep sret
-
-%struct.S = type <{ i32, i32 }>
-
-define i32 @main() {
-entry:
-        %bar = bitcast void (%struct.S*)* @foo to i32 ()*
-	ret i32 undef
-}
-
-define internal void @foo(%struct.S* sret) {
-entry:
-	ret void
-}
diff --git a/test/Transforms/SRETPromotion/basictest.ll b/test/Transforms/SRETPromotion/basictest.ll
deleted file mode 100644
index ff047dc..0000000
--- a/test/Transforms/SRETPromotion/basictest.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: opt < %s -sretpromotion -S > %t
-; RUN: cat %t | grep sret | count 1
-
-; This function is promotable
-define internal void @promotable({i32, i32}* sret %s) {
-  %A = getelementptr {i32, i32}* %s, i32 0, i32 0
-  store i32 0, i32* %A
-  %B = getelementptr {i32, i32}* %s, i32 0, i32 0
-  store i32 1, i32* %B
-  ret void
-}
-
-; This function is not promotable (due to it's use below)
-define internal void @notpromotable({i32, i32}* sret %s) {
-  %A = getelementptr {i32, i32}* %s, i32 0, i32 0
-  store i32 0, i32* %A
-  %B = getelementptr {i32, i32}* %s, i32 0, i32 0
-  store i32 1, i32* %B
-  ret void
-}
-
-define void @caller({i32, i32}* %t) {
-  %s = alloca {i32, i32}
-  call void @promotable({i32, i32}* %s)
-  %A = getelementptr {i32, i32}* %s, i32 0, i32 0
-  %a = load i32* %A
-  %B = getelementptr {i32, i32}* %s, i32 0, i32 0
-  %b = load i32* %B
-  ; This passes in something that's not an alloca, which makes the argument not
-  ; promotable
-  call void @notpromotable({i32, i32}* %t)
-  ret void
-}
diff --git a/test/Transforms/SRETPromotion/dg.exp b/test/Transforms/SRETPromotion/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/SRETPromotion/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll b/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
index d799bd7..8bc4ff0 100644
--- a/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
+++ b/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret i8 17}
 ; rdar://5707076
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 target triple = "i386-apple-darwin9.1.0"
 	%struct.T = type <{ i8, [3 x i8] }>
 
diff --git a/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll b/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
index 87a08b7..ce70a1b 100644
--- a/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
+++ b/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
@@ -13,7 +13,7 @@ define i32 @foo() {
 	%res2 = insertvalue { i32, i32 } %res1, i32 2, 1		; <{ i32, i32 }> [#uses=1]
         ; And store it
 	store { i32, i32 } %res2, { i32, i32 }* %target
-        ; Actually use %target, so it doesn't get removed alltogether
+        ; Actually use %target, so it doesn't get removed altogether
         %ptr = getelementptr { i32, i32 }* %target, i32 0, i32 0
         %val = load i32* %ptr
 	ret i32 %val
@@ -26,7 +26,7 @@ define i32 @bar() {
 	%res2 = insertvalue [ 2 x i32 ] %res1, i32 2, 1		; <{ i32, i32 }> [#uses=1]
         ; And store it
 	store [ 2 x i32 ] %res2, [ 2 x i32 ]* %target
-        ; Actually use %target, so it doesn't get removed alltogether
+        ; Actually use %target, so it doesn't get removed altogether
         %ptr = getelementptr [ 2 x i32 ]* %target, i32 0, i32 0
         %val = load i32* %ptr
 	ret i32 %val
diff --git a/test/Transforms/ScalarRepl/dg.exp b/test/Transforms/ScalarRepl/dg.exp
index f200589..39954d8 100644
--- a/test/Transforms/ScalarRepl/dg.exp
+++ b/test/Transforms/ScalarRepl/dg.exp
@@ -1,3 +1,3 @@
 load_lib llvm.exp
 
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
diff --git a/test/Transforms/ScalarRepl/inline-vector.ll b/test/Transforms/ScalarRepl/inline-vector.ll
new file mode 100644
index 0000000..2f51cc7
--- /dev/null
+++ b/test/Transforms/ScalarRepl/inline-vector.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+; RUN: opt < %s -scalarrepl-ssa -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10.0.0"
+
+%struct.Vector4 = type { float, float, float, float }
+@f.vector = internal constant %struct.Vector4 { float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 }, align 16
+
+; CHECK: define void @f
+; CHECK-NOT: alloca
+; CHECK: phi <4 x float>
+
+define void @f() nounwind ssp {
+entry:
+  %i = alloca i32, align 4
+  %vector = alloca %struct.Vector4, align 16
+  %agg.tmp = alloca %struct.Vector4, align 16
+  %tmp = bitcast %struct.Vector4* %vector to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.Vector4* @f.vector to i8*), i32 16, i32 16, i1 false)
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %storemerge = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  store i32 %storemerge, i32* %i, align 4
+  %cmp = icmp slt i32 %storemerge, 1000000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp2 = bitcast %struct.Vector4* %agg.tmp to i8*
+  %tmp3 = bitcast %struct.Vector4* %vector to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false)
+  %0 = bitcast %struct.Vector4* %agg.tmp to [2 x i64]*
+  %1 = load [2 x i64]* %0, align 16
+  %tmp2.i = extractvalue [2 x i64] %1, 0
+  %tmp3.i = zext i64 %tmp2.i to i128
+  %tmp10.i = bitcast i128 %tmp3.i to <4 x float>
+  %sub.i.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp10.i
+  %2 = bitcast %struct.Vector4* %vector to <4 x float>*
+  store <4 x float> %sub.i.i, <4 x float>* %2, align 16
+  %tmp4 = load i32* %i, align 4
+  %inc = add nsw i32 %tmp4, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %x = getelementptr inbounds %struct.Vector4* %vector, i32 0, i32 0
+  %tmp5 = load float* %x, align 16
+  %conv = fpext float %tmp5 to double
+  %call = call i32 (...)* @printf(double %conv) nounwind
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare i32 @printf(...)
diff --git a/test/Transforms/ScalarRepl/only-memcpy-uses.ll b/test/Transforms/ScalarRepl/only-memcpy-uses.ll
new file mode 100644
index 0000000..cfb88bd
--- /dev/null
+++ b/test/Transforms/ScalarRepl/only-memcpy-uses.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%struct.S = type { [12 x i32] }
+
+; CHECK: @bar4
+define void @bar4(%struct.S* byval %s) nounwind ssp {
+entry:
+; CHECK: alloca
+; CHECK-NOT: load
+; CHECK: memcpy
+  %t = alloca %struct.S, align 4
+  %agg.tmp = alloca %struct.S, align 4
+  %tmp = bitcast %struct.S* %t to i8*
+  %tmp1 = bitcast %struct.S* %s to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false)
+  %tmp2 = bitcast %struct.S* %agg.tmp to i8*
+  %tmp3 = bitcast %struct.S* %t to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp3, i64 48, i32 4, i1 false)
+  %call = call i32 (...)* @bazz(%struct.S* byval %agg.tmp)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare i32 @bazz(...)
diff --git a/test/Transforms/ScalarRepl/union-pointer.ll b/test/Transforms/ScalarRepl/union-pointer.ll
index fe702fa..ea4ec14 100644
--- a/test/Transforms/ScalarRepl/union-pointer.ll
+++ b/test/Transforms/ScalarRepl/union-pointer.ll
@@ -3,7 +3,7 @@
 ; RUN:   not grep alloca
 ; RUN: opt < %s -scalarrepl -S | grep {ret i8}
 
-target datalayout = "e-p:32:32"
+target datalayout = "e-p:32:32-n8:16:32"
 target triple = "i686-apple-darwin8.7.2"
 	%struct.Val = type { i32*, i32 }
 
diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll
index 37cb49f..c51ef10 100644
--- a/test/Transforms/ScalarRepl/vector_promote.ll
+++ b/test/Transforms/ScalarRepl/vector_promote.ll
@@ -94,7 +94,172 @@ define i64 @test6(<2 x float> %X) {
 	%tmp = load i64* %P
 	ret i64 %tmp
 ; CHECK: @test6
-; CHECK: bitcast <2 x float> %X to <1 x i64>
+; CHECK: bitcast <2 x float> %X to i64
 ; CHECK: ret i64
 }
 
+define float @test7(<4 x float> %x) {
+	%a = alloca <4 x float>
+	store <4 x float> %x, <4 x float>* %a
+	%p = bitcast <4 x float>* %a to <2 x float>*
+	%b = load <2 x float>* %p
+	%q = getelementptr <4 x float>* %a, i32 0, i32 2
+	%c = load float* %q
+	ret float %c
+; CHECK: @test7
+; CHECK-NOT: alloca
+; CHECK: bitcast <4 x float> %x to <2 x double>
+; CHECK-NEXT: extractelement <2 x double>
+; CHECK-NEXT: bitcast double %tmp4 to <2 x float>
+; CHECK-NEXT: extractelement <4 x float>
+}
+
+define void @test8(<4 x float> %x, <2 x float> %y) {
+	%a = alloca <4 x float>
+	store <4 x float> %x, <4 x float>* %a
+	%p = bitcast <4 x float>* %a to <2 x float>*
+	store <2 x float> %y, <2 x float>* %p
+	ret void
+; CHECK: @test8
+; CHECK-NOT: alloca
+; CHECK: bitcast <4 x float> %x to <2 x double>
+; CHECK-NEXT: bitcast <2 x float> %y to double
+; CHECK-NEXT: insertelement <2 x double>
+; CHECK-NEXT: bitcast <2 x double> %tmp2 to <4 x float>
+}
+
+define i256 @test9(<4 x i256> %x) {
+	%a = alloca <4 x i256>
+	store <4 x i256> %x, <4 x i256>* %a
+	%p = bitcast <4 x i256>* %a to <2 x i256>*
+	%b = load <2 x i256>* %p
+	%q = getelementptr <4 x i256>* %a, i32 0, i32 2
+	%c = load i256* %q
+	ret i256 %c
+; CHECK: @test9
+; CHECK-NOT: alloca
+; CHECK: bitcast <4 x i256> %x to <2 x i512>
+; CHECK-NEXT: extractelement <2 x i512>
+; CHECK-NEXT: bitcast i512 %tmp4 to <2 x i256>
+; CHECK-NEXT: extractelement <4 x i256>
+}
+
+define void @test10(<4 x i256> %x, <2 x i256> %y) {
+	%a = alloca <4 x i256>
+	store <4 x i256> %x, <4 x i256>* %a
+	%p = bitcast <4 x i256>* %a to <2 x i256>*
+	store <2 x i256> %y, <2 x i256>* %p
+	ret void
+; CHECK: @test10
+; CHECK-NOT: alloca
+; CHECK: bitcast <4 x i256> %x to <2 x i512>
+; CHECK-NEXT: bitcast <2 x i256> %y to i512
+; CHECK-NEXT: insertelement <2 x i512>
+; CHECK-NEXT: bitcast <2 x i512> %tmp2 to <4 x i256>
+}
+
+%union.v = type { <2 x i64> }
+
+define void @test11(<2 x i64> %x) {
+  %a = alloca %union.v
+  %p = getelementptr inbounds %union.v* %a, i32 0, i32 0
+  store <2 x i64> %x, <2 x i64>* %p, align 16
+  %q = getelementptr inbounds %union.v* %a, i32 0, i32 0
+  %r = bitcast <2 x i64>* %q to <4 x float>*
+  %b = load <4 x float>* %r, align 16
+  ret void
+; CHECK: @test11
+; CHECK-NOT: alloca
+}
+
+define void @test12() {
+entry:
+  %a = alloca <64 x i8>, align 64
+  store <64 x i8> undef, <64 x i8>* %a, align 64
+  %p = bitcast <64 x i8>* %a to <16 x i8>*
+  %0 = load <16 x i8>* %p, align 64
+  store <16 x i8> undef, <16 x i8>* %p, align 64
+  %q = bitcast <16 x i8>* %p to <64 x i8>*
+  %1 = load <64 x i8>* %q, align 64
+  ret void
+; CHECK: @test12
+; CHECK-NOT: alloca
+; CHECK: extractelement <4 x i128>
+; CHECK: insertelement <4 x i128>
+}
+
+define float @test13(<4 x float> %x, <2 x i32> %y) {
+	%a = alloca <4 x float>
+	store <4 x float> %x, <4 x float>* %a
+	%p = bitcast <4 x float>* %a to <2 x float>*
+	%b = load <2 x float>* %p
+	%q = getelementptr <4 x float>* %a, i32 0, i32 2
+	%c = load float* %q
+	%r = bitcast <4 x float>* %a to <2 x i32>*
+	store <2 x i32> %y, <2 x i32>* %r
+	ret float %c
+; CHECK: @test13
+; CHECK-NOT: alloca
+; CHECK: bitcast <4 x float> %x to i128
+}
+
+define <3 x float> @test14(<3 x float> %x)  {
+entry:
+  %x.addr = alloca <3 x float>, align 16
+  %r = alloca <3 x i32>, align 16
+  %extractVec = shufflevector <3 x float> %x, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %storetmp = bitcast <3 x float>* %x.addr to <4 x float>*
+  store <4 x float> %extractVec, <4 x float>* %storetmp, align 16
+  %tmp = load <3 x float>* %x.addr, align 16
+  %cmp = fcmp une <3 x float> %tmp, zeroinitializer
+  %sext = sext <3 x i1> %cmp to <3 x i32>
+  %and = and <3 x i32> <i32 1065353216, i32 1065353216, i32 1065353216>, %sext
+  %extractVec1 = shufflevector <3 x i32> %and, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %storetmp2 = bitcast <3 x i32>* %r to <4 x i32>*
+  store <4 x i32> %extractVec1, <4 x i32>* %storetmp2, align 16
+  %tmp3 = load <3 x i32>* %r, align 16
+  %0 = bitcast <3 x i32> %tmp3 to <3 x float>
+  %tmp4 = load <3 x float>* %x.addr, align 16
+  ret <3 x float> %tmp4
+; CHECK: @test14
+; CHECK-NOT: alloca
+; CHECK: shufflevector <4 x i32> %extractVec1, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+}
+
+define void @test15(<3 x i64>* sret %agg.result, <3 x i64> %x, <3 x i64> %min) {
+entry:
+  %x.addr = alloca <3 x i64>, align 32
+  %min.addr = alloca <3 x i64>, align 32
+  %extractVec = shufflevector <3 x i64> %x, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %storetmp = bitcast <3 x i64>* %x.addr to <4 x i64>*
+  store <4 x i64> %extractVec, <4 x i64>* %storetmp, align 32
+  %extractVec1 = shufflevector <3 x i64> %min, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %storetmp2 = bitcast <3 x i64>* %min.addr to <4 x i64>*
+  store <4 x i64> %extractVec1, <4 x i64>* %storetmp2, align 32
+  %tmp = load <3 x i64>* %x.addr
+  %tmp5 = extractelement <3 x i64> %tmp, i32 0
+  %tmp11 = insertelement <3 x i64> %tmp, i64 %tmp5, i32 0
+  store <3 x i64> %tmp11, <3 x i64>* %x.addr
+  %tmp30 = load <3 x i64>* %x.addr, align 32
+  store <3 x i64> %tmp30, <3 x i64>* %agg.result
+  ret void
+; CHECK: @test15
+; CHECK-NOT: alloca
+; CHECK: shufflevector <4 x i64> %tmpV2, <4 x i64> undef, <3 x i32> <i32 0, i32 1, i32 2>
+}
+
+define <4 x float> @test16(<4 x float> %x, i64 %y0, i64 %y1) {
+entry:
+  %tmp8 = bitcast <4 x float> undef to <2 x double>
+  %tmp9 = bitcast i64 %y0 to double
+  %tmp10 = insertelement <2 x double> %tmp8, double %tmp9, i32 0
+  %tmp11 = bitcast <2 x double> %tmp10 to <4 x float>
+  %tmp3 = bitcast <4 x float> %tmp11 to <2 x double>
+  %tmp4 = bitcast i64 %y1 to double
+  %tmp5 = insertelement <2 x double> %tmp3, double %tmp4, i32 1
+  %tmp6 = bitcast <2 x double> %tmp5 to <4 x float>
+	ret <4 x float> %tmp6
+; CHECK: @test16
+; CHECK-NOT: alloca
+; CHECK: bitcast <4 x float> %tmp11 to <2 x double>
+}
diff --git a/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll b/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll
index 2c84c93..70fbddf 100644
--- a/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll
+++ b/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll
@@ -1,7 +1,5 @@
-; RUN: opt < %s -gvn -simplifycfg \
-; RUN:   -disable-output
+; RUN: opt < %s -gvn -simplifycfg -disable-output
 ; PR867
-; END.
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll b/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
index 59e886b..56f43b6 100644
--- a/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -simplifycfg -S > %t
 ; RUN: not grep {^BB.tomerge} %t
-; RUN  grep {^BB.nomerge} %t | count 2
+; RUN: grep {^BB.nomerge} %t | count 2
 
 ; ModuleID = '<stdin>' 
 declare i1 @foo()
diff --git a/test/Transforms/SimplifyCFG/2011-03-08-UnreachableUse.ll b/test/Transforms/SimplifyCFG/2011-03-08-UnreachableUse.ll
new file mode 100644
index 0000000..329774e
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/2011-03-08-UnreachableUse.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+; PR9420
+
+; Note that the crash in PR9420 test is sensitive to the ordering of
+; the transformations done by SimplifyCFG, so this test is likely to rot
+; quickly.
+
+define noalias i8* @func_29() nounwind {
+; CHECK: entry:
+; CHECK-NEXT: unreachable
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc38, %entry
+  %p_34.addr.0 = phi i16 [ 1, %entry ], [ %conv40, %for.inc38 ]
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc29, %for.cond
+  %p_32.addr.0 = phi i1 [ true, %for.cond ], [ true, %for.inc29 ]
+  br i1 %p_32.addr.0, label %for.body8, label %for.inc38
+
+for.body8:                                        ; preds = %for.cond1
+  unreachable
+
+for.inc29:                                        ; preds = %for.cond17
+  br label %for.cond1
+
+for.inc38:                                        ; preds = %for.end32
+  %conv40 = add i16 %p_34.addr.0, 1
+  br label %for.cond
+}
diff --git a/test/Transforms/SimplifyCFG/PhiBlockMerge.ll b/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
index c28d0ba..36b52f5 100644
--- a/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
@@ -6,6 +6,7 @@
 define i32 @test(i1 %a, i1 %b) {
 ; CHECK: br i1 %a
         br i1 %a, label %M, label %O
+; CHECK: O:
 O:              ; preds = %0
 ; CHECK: select i1 %b, i32 0, i32 1
 ; CHECK-NOT: phi
diff --git a/test/Transforms/SimplifyCFG/PhiEliminate2.ll b/test/Transforms/SimplifyCFG/PhiEliminate2.ll
index c0f6781..0b3893d 100644
--- a/test/Transforms/SimplifyCFG/PhiEliminate2.ll
+++ b/test/Transforms/SimplifyCFG/PhiEliminate2.ll
@@ -1,14 +1,17 @@
 ; RUN: opt < %s -simplifycfg -S | not grep br
 
-define i32 @test(i1 %C, i32 %V1, i32 %V2) {
+define i32 @test(i1 %C, i32 %V1, i32 %V2, i16 %V3) {
 entry:
-        br i1 %C, label %then, label %Cont
+        br i1 %C, label %then, label %else
 then:           ; preds = %entry
-        %V3 = or i32 %V2, %V1           ; <i32> [#uses=1]
+        %V4 = or i32 %V2, %V1           ; <i32> [#uses=1]
         br label %Cont
-Cont:           ; preds = %then, %entry
-        %V4 = phi i32 [ %V1, %entry ], [ %V3, %then ]           ; <i32> [#uses=0]
-        call i32 @test( i1 false, i32 0, i32 0 )                ; <i32>:0 [#uses=0]
+else:           ; preds = %entry
+        %V5 = sext i16 %V3 to i32       ; <i32> [#uses=1]
+        br label %Cont
+Cont:           ; preds = %then, %else
+        %V6 = phi i32 [ %V5, %else ], [ %V4, %then ]            ; <i32> [#uses=0]
+        call i32 @test( i1 false, i32 0, i32 0, i16 0 )         ; <i32>:0 [#uses=0]
         ret i32 %V1
 }
 
diff --git a/test/Transforms/SimplifyCFG/PhiEliminate3.ll b/test/Transforms/SimplifyCFG/PhiEliminate3.ll
new file mode 100644
index 0000000..3566b87
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/PhiEliminate3.ll
@@ -0,0 +1,34 @@
+; Test merging of blocks containing complex expressions,
+; with various folding thresholds
+;
+; RUN: opt < %s -simplifycfg -S -phi-node-folding-threshold=1 | grep N:
+; RUN: opt < %s -simplifycfg -S -phi-node-folding-threshold=2 | not grep N:
+; RUN: opt < %s -simplifycfg -S -phi-node-folding-threshold=2 | grep M:
+; RUN: opt < %s -simplifycfg -S -phi-node-folding-threshold=7 | not grep M:
+;
+
+define i32 @test(i1 %a, i1 %b, i32 %i, i32 %j, i32 %k) {
+entry:
+        br i1 %a, label %M, label %O
+O:
+        br i1 %b, label %P, label %Q
+P:
+        %iaj = add i32 %i, %j
+        %iajak = add i32 %iaj, %k
+        br label %N
+Q:
+        %ixj = xor i32 %i, %j
+        %ixjxk = xor i32 %ixj, %k
+        br label %N
+N:
+        ; This phi should be foldable if threshold >= 2
+        %Wp = phi i32 [ %iajak, %P ], [ %ixjxk, %Q ]
+        %Wp2 = add i32 %Wp, %Wp
+        br label %M
+M:
+        ; This phi should be foldable if threshold >= 7
+        %W = phi i32 [ %Wp2, %N ], [ 2, %entry ]
+        %R = add i32 %W, 1
+        ret i32 %R
+}
+
diff --git a/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
index 7133d98..4a692f3 100644
--- a/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
+++ b/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
@@ -1,33 +1,73 @@
-; RUN: opt < %s -simplifycfg -S | not grep unreachable
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 define void @test1(i1 %C, i1* %BP) {
+; CHECK: @test1
+; CHECK: entry:
+; CHECK-NEXT: ret void
+entry:
         br i1 %C, label %T, label %F
-T:              ; preds = %0
+T:
         store i1 %C, i1* %BP
         unreachable
-F:              ; preds = %0
+F:
         ret void
 }
 
 define void @test2() {
+; CHECK: @test2
+; CHECK: entry:
+; CHECK-NEXT: call void @test2()
+; CHECK-NEXT: ret void
+entry:
         invoke void @test2( )
                         to label %N unwind label %U
-U:              ; preds = %0
+U:
         unreachable
-N:              ; preds = %0
+N:
         ret void
 }
 
 define i32 @test3(i32 %v) {
+; CHECK: @test3
+; CHECK: entry:
+; CHECK-NEXT: [[CMP:%[A-Za-z0-9]+]] = icmp eq i32 %v, 2
+; CHECK-NEXT: select i1 [[CMP]], i32 2, i32 1
+; CHECK-NEXT: ret
+entry:
         switch i32 %v, label %default [
                  i32 1, label %U
                  i32 2, label %T
         ]
-default:                ; preds = %0
+default:
         ret i32 1
-U:              ; preds = %0
+U:
         unreachable
-T:              ; preds = %0
+T:
         ret i32 2
 }
 
+; PR9450
+define i32 @test4(i32 %v) {
+; CHECK: entry:
+; CHECK-NEXT:  switch i32 %v, label %T [
+; CHECK-NEXT:    i32 3, label %V
+; CHECK-NEXT:    i32 2, label %U
+; CHECK-NEXT:  ]
+
+entry:
+        br label %SWITCH
+V:
+        ret i32 7
+SWITCH:
+        switch i32 %v, label %default [
+                 i32 1, label %T
+                 i32 2, label %U
+                 i32 3, label %V
+        ]
+default:
+        unreachable
+U:
+        ret i32 1
+T:
+        ret i32 2
+}
diff --git a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
new file mode 100644
index 0000000..0897c95
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
@@ -0,0 +1,58 @@
+; RUN: opt -simplifycfg -S %s | FileCheck %s
+
+%0 = type { i32*, i32* }
+
+@0 = external hidden constant [5 x %0], align 4
+
+define void @foo(i32) nounwind ssp {
+Entry:
+  %1 = icmp slt i32 %0, 0, !dbg !5
+  br i1 %1, label %BB5, label %BB1, !dbg !5
+
+BB1:                                              ; preds = %Entry
+  %2 = icmp sgt i32 %0, 4, !dbg !5
+  br i1 %2, label %BB5, label %BB2, !dbg !5
+
+BB2:                                              ; preds = %BB1
+  %3 = shl i32 1, %0, !dbg !5
+  %4 = and i32 %3, 31, !dbg !5
+  %5 = icmp eq i32 %4, 0, !dbg !5
+  br i1 %5, label %BB5, label %BB3, !dbg !5
+
+;CHECK: icmp eq
+;CHECK-NEXT: getelementptr
+;CHECK-NEXT: icmp eq
+
+BB3:                                              ; preds = %BB2
+  %6 = getelementptr inbounds [5 x %0]* @0, i32 0, i32 %0, !dbg !6
+  call void @llvm.dbg.value(metadata !{%0* %6}, i64 0, metadata !7), !dbg !12
+  %7 = icmp eq %0* %6, null, !dbg !13
+  br i1 %7, label %BB5, label %BB4, !dbg !13
+
+BB4:                                              ; preds = %BB3
+  %8 = icmp slt i32 %0, 0, !dbg !5
+  ret void, !dbg !14
+
+BB5:                                              ; preds = %BB3, %BB2, %BB1, %Entry
+  ret void, !dbg !14
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 231, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i32)* @foo, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.i", metadata !"/private/tmp", metadata !"clang (trunk 129006)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 131, i32 2, metadata !0, null}
+!6 = metadata !{i32 134, i32 2, metadata !0, null}
+!7 = metadata !{i32 590080, metadata !8, metadata !"bar", metadata !1, i32 232, metadata !9, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 231, i32 1, metadata !1, i32 3} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 589862, metadata !2, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ]
+!11 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 232, i32 40, metadata !8, null}
+!13 = metadata !{i32 234, i32 2, metadata !8, null}
+!14 = metadata !{i32 274, i32 1, metadata !8, null}
diff --git a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
new file mode 100644
index 0000000..03053f0
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
@@ -0,0 +1,53 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+
+define i32 @foo(i32 %i) nounwind ssp {
+  call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !6), !dbg !7
+  call void @llvm.dbg.value(metadata !8, i64 0, metadata !9), !dbg !11
+  %1 = icmp ne i32 %i, 0, !dbg !12
+;CHECK: call i32 (...)* @bar()
+;CHECK-NEXT: llvm.dbg.value
+  br i1 %1, label %2, label %4, !dbg !12
+
+; <label>:2                                       ; preds = %0
+  %3 = call i32 (...)* @bar(), !dbg !13
+  call void @llvm.dbg.value(metadata !{i32 %3}, i64 0, metadata !9), !dbg !13
+  br label %6, !dbg !15
+
+; <label>:4                                       ; preds = %0
+  %5 = call i32 (...)* @bar(), !dbg !16
+  call void @llvm.dbg.value(metadata !{i32 %5}, i64 0, metadata !9), !dbg !16
+  br label %6, !dbg !18
+
+; <label>:6                                       ; preds = %4, %2
+  %k.0 = phi i32 [ %3, %2 ], [ %5, %4 ]
+  ret i32 %k.0, !dbg !19
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i32 @bar(...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"b.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"b.c", metadata !"/private/tmp", metadata !"clang", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !0, metadata !"i", metadata !1, i32 16777218, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 2, i32 13, metadata !0, null}
+!8 = metadata !{i32 0}
+!9 = metadata !{i32 590080, metadata !10, metadata !"k", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{i32 589835, metadata !0, i32 2, i32 16, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 3, i32 12, metadata !10, null}
+!12 = metadata !{i32 4, i32 3, metadata !10, null}
+!13 = metadata !{i32 5, i32 5, metadata !14, null}
+!14 = metadata !{i32 589835, metadata !10, i32 4, i32 10, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 6, i32 3, metadata !14, null}
+!16 = metadata !{i32 7, i32 5, metadata !17, null}
+!17 = metadata !{i32 589835, metadata !10, i32 6, i32 10, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 8, i32 3, metadata !17, null}
+!19 = metadata !{i32 9, i32 3, metadata !10, null}
diff --git a/test/Transforms/SimplifyCFG/switch-on-const-select.ll b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
new file mode 100644
index 0000000..5494a65
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
@@ -0,0 +1,138 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; Test basic folding to a conditional branch.
+define i32 @foo(i64 %x, i64 %y) nounwind {
+; CHECK: @foo
+entry:
+    %eq = icmp eq i64 %x, %y
+    br i1 %eq, label %b, label %switch
+switch:
+    %lt = icmp slt i64 %x, %y
+; CHECK: br i1 %lt, label %a, label %b
+    %qux = select i1 %lt, i32 0, i32 2
+    switch i32 %qux, label %bees [
+        i32 0, label %a
+        i32 1, label %b
+        i32 2, label %b
+    ]
+a:
+    tail call void @bees.a() nounwind
+    ret i32 1
+; CHECK: b:
+; CHECK-NEXT: %retval = phi i32 [ 0, %switch ], [ 2, %entry ]
+b:
+    %retval = phi i32 [0, %switch], [0, %switch], [2, %entry]
+    tail call void @bees.b() nounwind
+    ret i32 %retval
+; CHECK-NOT: bees:
+bees:
+    tail call void @llvm.trap() nounwind
+    unreachable
+}
+
+; Test basic folding to an unconditional branch.
+define i32 @bar(i64 %x, i64 %y) nounwind {
+; CHECK: @bar
+entry:
+; CHECK-NEXT: entry:
+; CHECK-NEXT: tail call void @bees.a() nounwind
+; CHECK-NEXT: ret i32 0
+    %lt = icmp slt i64 %x, %y
+    %qux = select i1 %lt, i32 0, i32 2
+    switch i32 %qux, label %bees [
+        i32 0, label %a
+        i32 1, label %b
+        i32 2, label %a
+    ]
+a:
+    %retval = phi i32 [0, %entry], [0, %entry], [1, %b]
+    tail call void @bees.a() nounwind
+    ret i32 0
+b:
+    tail call void @bees.b() nounwind
+    br label %a
+bees:
+    tail call void @llvm.trap() nounwind
+    unreachable
+}
+
+; Test the edge case where both values from the select are the default case.
+define void @bazz(i64 %x, i64 %y) nounwind {
+; CHECK: @bazz
+entry:
+; CHECK-NEXT: entry:
+; CHECK-NEXT: tail call void @bees.b() nounwind
+; CHECK-NEXT: ret void
+    %lt = icmp slt i64 %x, %y
+    %qux = select i1 %lt, i32 10, i32 12
+    switch i32 %qux, label %b [
+        i32 0, label %a
+        i32 1, label %bees
+        i32 2, label %bees
+    ]
+a:
+    tail call void @bees.a() nounwind
+    ret void
+b:
+    tail call void @bees.b() nounwind
+    ret void
+bees:
+    tail call void @llvm.trap()
+    unreachable
+}
+
+; Test the edge case where both values from the select are equal.
+define void @quux(i64 %x, i64 %y) nounwind {
+; CHECK: @quux
+entry:
+; CHECK-NEXT: entry:
+; CHECK-NEXT: tail call void @bees.a() nounwind
+; CHECK-NEXT: ret void
+    %lt = icmp slt i64 %x, %y
+    %qux = select i1 %lt, i32 0, i32 0
+    switch i32 %qux, label %b [
+        i32 0, label %a
+        i32 1, label %bees
+        i32 2, label %bees
+    ]
+a:
+    tail call void @bees.a() nounwind
+    ret void
+b:
+    tail call void @bees.b() nounwind
+    ret void
+bees:
+    tail call void @llvm.trap()
+    unreachable
+}
+
+; A final test, for phi node munging.
+define i32 @xyzzy(i64 %x, i64 %y) {
+; CHECK: @xyzzy
+entry:
+    %eq = icmp eq i64 %x, %y
+    br i1 %eq, label %r, label %cont
+cont:
+; CHECK: %lt = icmp slt i64 %x, %y
+    %lt = icmp slt i64 %x, %y
+; CHECK-NEXT: br i1 %lt, label %a, label %r
+    %qux = select i1 %lt, i32 0, i32 2
+    switch i32 %qux, label %bees [
+        i32 0, label %a
+        i32 1, label %r
+        i32 2, label %r
+    ]
+r:
+    %val = phi i32 [0, %entry], [1, %cont], [1, %cont]
+    ret i32 %val
+a:
+    ret i32 -1
+; CHECK-NOT: bees:
+bees:
+    tail call void @llvm.trap()
+    unreachable
+}
+
+declare void @llvm.trap() nounwind noreturn
+declare void @bees.a() nounwind
+declare void @bees.b() nounwind
diff --git a/test/Transforms/SimplifyCFG/trap-debugloc.ll b/test/Transforms/SimplifyCFG/trap-debugloc.ll
new file mode 100644
index 0000000..24540e5
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/trap-debugloc.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+; Radar 9342286
+; Assign DebugLoc to trap instruction.
+define void @foo() nounwind ssp {
+; CHECK: call void @llvm.trap(), !dbg
+  store i32 42, i32* null, !dbg !5
+  ret void, !dbg !7
+}
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"foo.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"foo.c", metadata !"/private/tmp", metadata !"Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 4, i32 2, metadata !6, null}
+!6 = metadata !{i32 589835, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 5, i32 1, metadata !6, null}
diff --git a/test/Transforms/SimplifyLibCalls/debug-line.ll b/test/Transforms/SimplifyLibCalls/debug-line.ll
new file mode 100644
index 0000000..b668e4b
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/debug-line.ll
@@ -0,0 +1,24 @@
+; RUN: opt -simplify-libcalls -S < %s | FileCheck %s
+
+
+@.str = private constant [3 x i8] c"%c\00"
+
+define void @foo() nounwind ssp {
+;CHECK: call i32 @putchar{{.+}} !dbg
+  %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 97), !dbg !5
+  ret void, !dbg !7
+}
+
+declare i32 @printf(i8*, ...)
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"m.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"m.c", metadata !"/private/tmp", metadata !"clang", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 5, i32 2, metadata !6, null}
+!6 = metadata !{i32 589835, metadata !0, i32 4, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 6, i32 1, metadata !6, null}
+
diff --git a/test/Transforms/SimplifyLibCalls/half-powr.ll b/test/Transforms/SimplifyLibCalls/half-powr.ll
deleted file mode 100644
index 5d317fe..0000000
--- a/test/Transforms/SimplifyLibCalls/half-powr.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; RUN: opt -simplify-libcalls-halfpowr %s -S | FileCheck %s
-
-define float @__half_powrf4(float %f, float %g) nounwind readnone {
-entry:
-	%0 = fcmp olt float %f, 2.000000e+00		; <i1> [#uses=1]
-	br i1 %0, label %bb, label %bb1
-
-bb:		; preds = %entry
-	%1 = fdiv float %f, 3.000000e+00		; <float> [#uses=1]
-	br label %bb1
-
-bb1:		; preds = %bb, %entry
-	%f_addr.0 = phi float [ %1, %bb ], [ %f, %entry ]		; <float> [#uses=1]
-	%2 = fmul float %f_addr.0, %g		; <float> [#uses=1]
-; CHECK: fmul float %f_addr
-; CHECK: fmul float %f_addr
-; CHECK: fmul float %f_addr
-; CHECK: fmul float %f_addr
-
-	ret float %2
-}
-
-define void @foo(float* %p) nounwind {
-entry:
-	%0 = load float* %p, align 4		; <float> [#uses=1]
-	%1 = getelementptr float* %p, i32 1		; <float*> [#uses=1]
-	%2 = load float* %1, align 4		; <float> [#uses=1]
-	%3 = getelementptr float* %p, i32 2		; <float*> [#uses=1]
-	%4 = load float* %3, align 4		; <float> [#uses=1]
-	%5 = getelementptr float* %p, i32 3		; <float*> [#uses=1]
-	%6 = load float* %5, align 4		; <float> [#uses=1]
-	%7 = getelementptr float* %p, i32 4		; <float*> [#uses=1]
-	%8 = load float* %7, align 4		; <float> [#uses=1]
-	%9 = getelementptr float* %p, i32 5		; <float*> [#uses=1]
-	%10 = load float* %9, align 4		; <float> [#uses=1]
-	%11 = tail call float @__half_powrf4(float %0, float %6) nounwind		; <float> [#uses=1]
-	%12 = tail call float @__half_powrf4(float %2, float %8) nounwind		; <float> [#uses=1]
-	%13 = tail call float @__half_powrf4(float %4, float %10) nounwind		; <float> [#uses=1]
-	%14 = getelementptr float* %p, i32 6		; <float*> [#uses=1]
-	store float %11, float* %14, align 4
-	%15 = getelementptr float* %p, i32 7		; <float*> [#uses=1]
-	store float %12, float* %15, align 4
-	%16 = getelementptr float* %p, i32 8		; <float*> [#uses=1]
-	store float %13, float* %16, align 4
-	ret void
-}
diff --git a/test/Transforms/SimplifyLibCalls/iprintf.ll b/test/Transforms/SimplifyLibCalls/iprintf.ll
new file mode 100644
index 0000000..7f036fe
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/iprintf.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -simplify-libcalls -S -o %t
+; RUN: FileCheck < %t %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "xcore-xmos-elf"
+
+@.str = internal constant [4 x i8] c"%f\0A\00"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+
+; Verify printf with no floating point arguments is transformed to iprintf
+define i32 @f0(i32 %x) nounwind {
+entry:
+; CHECK: define i32 @f0
+; CHECK: @iprintf
+; CHECK: }
+	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str1, i32 0, i32 0), i32 %x)		; <i32> [#uses=0]
+	ret i32 %0
+}
+
+; Verify we don't turn this into an iprintf call
+define void @f1(double %x) nounwind {
+entry:
+; CHECK: define void @f1
+; CHECK: @printf
+; CHECK: }
+	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), double %x) nounwind		; <i32> [#uses=0]
+	ret void
+}
+
+; Verify sprintf with no floating point arguments is transformed to siprintf
+define i32 @f2(i8* %p, i32 %x) nounwind {
+entry:
+; CHECK: define i32 @f2
+; CHECK: @siprintf
+; CHECK: }
+	%0 = tail call i32 (i8*, i8*, ...)* @sprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str1, i32 0, i32 0), i32 %x)
+	ret i32 %0
+}
+
+; Verify we don't turn this into an siprintf call
+define i32 @f3(i8* %p, double %x) nounwind {
+entry:
+; CHECK: define i32 @f3
+; CHECK: @sprintf
+; CHECK: }
+	%0 = tail call i32 (i8*, i8*, ...)* @sprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), double %x)
+	ret i32 %0
+}
+
+; Verify fprintf with no floating point arguments is transformed to fiprintf
+define i32 @f4(i8* %p, i32 %x) nounwind {
+entry:
+; CHECK: define i32 @f4
+; CHECK: @fiprintf
+; CHECK: }
+	%0 = tail call i32 (i8*, i8*, ...)* @fprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str1, i32 0, i32 0), i32 %x)
+	ret i32 %0
+}
+
+; Verify we don't turn this into an fiprintf call
+define i32 @f5(i8* %p, double %x) nounwind {
+entry:
+; CHECK: define i32 @f5
+; CHECK: @fprintf
+; CHECK: }
+	%0 = tail call i32 (i8*, i8*, ...)* @fprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), double %x)
+	ret i32 %0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+declare i32 @sprintf(i8* nocapture, i8* nocapture, ...) nounwind
+declare i32 @fprintf(i8* nocapture, i8* nocapture, ...) nounwind
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 2f37911..c9072a7 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -38,6 +38,7 @@ add_subdirectory(llvm-extract)
 add_subdirectory(llvm-diff)
 add_subdirectory(macho-dump)
 add_subdirectory(llvm-objdump)
+add_subdirectory(llvm-rtdyld)
 
 add_subdirectory(bugpoint)
 add_subdirectory(bugpoint-passes)
@@ -46,6 +47,17 @@ add_subdirectory(llvm-stub)
 add_subdirectory(edis)
 add_subdirectory(llvmc)
 
+if( NOT WIN32 )
+  add_subdirectory(lto)
+endif()
+
+if( LLVM_ENABLE_PIC )
+  # TODO: support other systems:
+  if( CMAKE_SYSTEM_NAME STREQUAL "Linux" )
+    add_subdirectory(gold)
+  endif()
+endif()
+
 if( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/clang/CMakeLists.txt )
   add_subdirectory( ${CMAKE_CURRENT_SOURCE_DIR}/clang )
 endif( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/clang/CMakeLists.txt )
diff --git a/tools/Makefile b/tools/Makefile
index 7310247..5960433 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -13,19 +13,20 @@ LEVEL := ..
 OPTIONAL_PARALLEL_DIRS := clang
 
 # Build LLDB if present. Note LLDB must be built last as it depends on the
-# wider LLVM infrastructure (including Clang). 
+# wider LLVM infrastructure (including Clang).
 OPTIONAL_DIRS := lldb
 
 # NOTE: The tools are organized into five groups of four consisting of one
 # large and three small executables. This is done to minimize memory load
 # in parallel builds.  Please retain this ordering.
-DIRS := llvm-config 
+DIRS := llvm-config
 PARALLEL_DIRS := opt llvm-as llvm-dis \
                  llc llvm-ranlib llvm-ar llvm-nm \
                  llvm-ld llvm-prof llvm-link \
                  lli llvm-extract llvm-mc \
                  bugpoint llvm-bcanalyzer llvm-stub \
-                 llvmc llvm-diff macho-dump llvm-objdump
+                 llvmc llvm-diff macho-dump llvm-objdump \
+	         llvm-rtdyld
 
 # Let users override the set of tools to build from the command line.
 ifdef ONLY_TOOLS
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index 1cbf632..aa3e290 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -71,7 +71,7 @@ BugDriver::BugDriver(const char *toolname, bool find_bugs,
                      LLVMContext& ctxt)
   : Context(ctxt), ToolName(toolname), ReferenceOutputFile(OutputFile),
     Program(0), Interpreter(0), SafeInterpreter(0), gcc(0),
-    run_find_bugs(find_bugs), Timeout(timeout), 
+    run_find_bugs(find_bugs), Timeout(timeout),
     MemoryLimit(memlimit), UseValgrind(use_valgrind) {}
 
 BugDriver::~BugDriver() {
@@ -97,7 +97,7 @@ Module *llvm::ParseInputFile(const std::string &Filename,
 
       if (TheTriple.getTriple().empty())
         TheTriple.setTriple(sys::getHostTriple());
-        
+
       TargetTriple.setTriple(TheTriple.getTriple());
     }
 
@@ -118,7 +118,7 @@ bool BugDriver::addSources(const std::vector<std::string> &Filenames) {
   // Load the first input file.
   Program = ParseInputFile(Filenames[0], Context);
   if (Program == 0) return true;
-    
+
   outs() << "Read input file      : '" << Filenames[0] << "'\n";
 
   for (unsigned i = 1, e = Filenames.size(); i != e; ++i) {
@@ -152,12 +152,12 @@ bool BugDriver::run(std::string &ErrMsg) {
     return runManyPasses(PassesToRun, ErrMsg);
   }
 
-  // If we're not running as a child, the first thing that we must do is 
-  // determine what the problem is. Does the optimization series crash the 
-  // compiler, or does it produce illegal code?  We make the top-level 
-  // decision by trying to run all of the passes on the the input program, 
-  // which should generate a bitcode file.  If it does generate a bitcode 
-  // file, then we know the compiler didn't crash, so try to diagnose a 
+  // If we're not running as a child, the first thing that we must do is
+  // determine what the problem is. Does the optimization series crash the
+  // compiler, or does it produce illegal code?  We make the top-level
+  // decision by trying to run all of the passes on the the input program,
+  // which should generate a bitcode file.  If it does generate a bitcode
+  // file, then we know the compiler didn't crash, so try to diagnose a
   // miscompilation.
   if (!PassesToRun.empty()) {
     outs() << "Running selected passes on program to test for crash: ";
@@ -194,10 +194,10 @@ bool BugDriver::run(std::string &ErrMsg) {
   // Make sure the reference output file gets deleted on exit from this
   // function, if appropriate.
   sys::Path ROF(ReferenceOutputFile);
-  FileRemover RemoverInstance(ROF, CreatedOutput && !SaveTemps);
+  FileRemover RemoverInstance(ROF.str(), CreatedOutput && !SaveTemps);
 
   // Diff the output of the raw program against the reference output.  If it
-  // matches, then we assume there is a miscompilation bug and try to 
+  // matches, then we assume there is a miscompilation bug and try to
   // diagnose it.
   outs() << "*** Checking the code generator...\n";
   bool Diff = diffProgram(Program, "", "", false, &Error);
diff --git a/tools/bugpoint/ExecutionDriver.cpp b/tools/bugpoint/ExecutionDriver.cpp
index f1601cd..9be9dfd 100644
--- a/tools/bugpoint/ExecutionDriver.cpp
+++ b/tools/bugpoint/ExecutionDriver.cpp
@@ -323,7 +323,7 @@ void BugDriver::compileProgram(Module *M, std::string *Error) const {
   }
 
   // Remove the temporary bitcode file when we are done.
-  FileRemover BitcodeFileRemover(BitcodeFile, !SaveTemps);
+  FileRemover BitcodeFileRemover(BitcodeFile.str(), !SaveTemps);
 
   // Actually compile the program!
   Interpreter->compileProgram(BitcodeFile.str(), Error, Timeout, MemoryLimit);
@@ -364,7 +364,8 @@ std::string BugDriver::executeProgram(const Module *Program,
 
   // Remove the temporary bitcode file when we are done.
   sys::Path BitcodePath(BitcodeFile);
-  FileRemover BitcodeFileRemover(BitcodePath, CreatedBitcode && !SaveTemps);
+  FileRemover BitcodeFileRemover(BitcodePath.str(),
+    CreatedBitcode && !SaveTemps);
 
   if (OutputFile.empty()) OutputFile = OutputPrefix + "-execution-output";
 
diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp
index 3a5f143..a9db38f 100644
--- a/tools/bugpoint/Miscompilation.cpp
+++ b/tools/bugpoint/Miscompilation.cpp
@@ -34,12 +34,12 @@ namespace llvm {
 }
 
 namespace {
-  static llvm::cl::opt<bool> 
-    DisableLoopExtraction("disable-loop-extraction", 
+  static llvm::cl::opt<bool>
+    DisableLoopExtraction("disable-loop-extraction",
         cl::desc("Don't extract loops when searching for miscompilations"),
         cl::init(false));
-  static llvm::cl::opt<bool> 
-    DisableBlockExtraction("disable-block-extraction", 
+  static llvm::cl::opt<bool>
+    DisableBlockExtraction("disable-block-extraction",
         cl::desc("Don't extract blocks when searching for miscompilations"),
         cl::init(false));
 
@@ -75,7 +75,7 @@ ReduceMiscompilingPasses::doTest(std::vector<std::string> &Prefix,
     BD.EmitProgressBitcode(BD.getProgram(), "pass-error",  false);
     exit(BD.debugOptimizerCrash());
   }
-  
+
   // Check to see if the finished program matches the reference output...
   bool Diff = BD.diffProgram(BD.getProgram(), BitcodeResult, "",
                              true /*delete bitcode*/, &Error);
@@ -309,7 +309,7 @@ static bool ExtractLoops(BugDriver &BD,
   bool MadeChange = false;
   while (1) {
     if (BugpointIsInterrupted) return MadeChange;
-    
+
     ValueToValueMapTy VMap;
     Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
     Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
@@ -354,7 +354,7 @@ static bool ExtractLoops(BugDriver &BD,
       BD.writeProgramToFile(OutputPrefix + "-loop-extract-fail-to-le.bc",
                             ToOptimizeLoopExtracted);
 
-      errs() << "Please submit the " 
+      errs() << "Please submit the "
              << OutputPrefix << "-loop-extract-fail-*.bc files.\n";
       delete ToOptimize;
       delete ToNotOptimize;
@@ -409,9 +409,9 @@ static bool ExtractLoops(BugDriver &BD,
     MiscompiledFunctions.clear();
     for (unsigned i = 0, e = MisCompFunctions.size(); i != e; ++i) {
       Function *NewF = ToNotOptimize->getFunction(MisCompFunctions[i].first);
-                                                  
+
       assert(NewF && "Function not found??");
-      assert(NewF->getFunctionType() == MisCompFunctions[i].second && 
+      assert(NewF->getFunctionType() == MisCompFunctions[i].second &&
              "found wrong function type?");
       MiscompiledFunctions.push_back(NewF);
     }
@@ -523,7 +523,7 @@ static bool ExtractBlocks(BugDriver &BD,
                           std::vector<Function*> &MiscompiledFunctions,
                           std::string &Error) {
   if (BugpointIsInterrupted) return false;
-  
+
   std::vector<BasicBlock*> Blocks;
   for (unsigned i = 0, e = MiscompiledFunctions.size(); i != e; ++i)
     for (Function::iterator I = MiscompiledFunctions[i]->begin(),
@@ -593,7 +593,7 @@ static bool ExtractBlocks(BugDriver &BD,
   for (unsigned i = 0, e = MisCompFunctions.size(); i != e; ++i) {
     Function *NewF = ProgClone->getFunction(MisCompFunctions[i].first);
     assert(NewF && "Function not found??");
-    assert(NewF->getFunctionType() == MisCompFunctions[i].second && 
+    assert(NewF->getFunctionType() == MisCompFunctions[i].second &&
            "Function has wrong type??");
     MiscompiledFunctions.push_back(NewF);
   }
@@ -731,7 +731,7 @@ void BugDriver::debugMiscompilation(std::string *Error) {
          << getPassesString(getPassesToRun()) << '\n';
   EmitProgressBitcode(Program, "passinput");
 
-  std::vector<Function *> MiscompiledFunctions = 
+  std::vector<Function *> MiscompiledFunctions =
     DebugAMiscompilation(*this, TestOptimizer, *Error);
   if (!Error->empty())
     return;
@@ -845,7 +845,7 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
           // Create a new global to hold the cached function pointer.
           Constant *NullPtr = ConstantPointerNull::get(F->getType());
           GlobalVariable *Cache =
-            new GlobalVariable(*F->getParent(), F->getType(), 
+            new GlobalVariable(*F->getParent(), F->getType(),
                                false, GlobalValue::InternalLinkage,
                                NullPtr,F->getName()+".fpcache");
 
@@ -885,7 +885,7 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
           new StoreInst(CastedResolver, Cache, LookupBB);
           BranchInst::Create(DoCallBB, LookupBB);
 
-          PHINode *FuncPtr = PHINode::Create(NullPtr->getType(),
+          PHINode *FuncPtr = PHINode::Create(NullPtr->getType(), 2,
                                              "fp", DoCallBB);
           FuncPtr->addIncoming(CastedResolver, LookupBB);
           FuncPtr->addIncoming(CachedVal, EntryBB);
@@ -943,7 +943,7 @@ static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe,
   }
   delete Test;
 
-  FileRemover TestModuleBCRemover(TestModuleBC, !SaveTemps);
+  FileRemover TestModuleBCRemover(TestModuleBC.str(), !SaveTemps);
 
   // Make the shared library
   sys::Path SafeModuleBC("bugpoint.safe.bc");
@@ -959,14 +959,14 @@ static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe,
     exit(1);
   }
 
-  FileRemover SafeModuleBCRemover(SafeModuleBC, !SaveTemps);
+  FileRemover SafeModuleBCRemover(SafeModuleBC.str(), !SaveTemps);
 
   std::string SharedObject = BD.compileSharedObject(SafeModuleBC.str(), Error);
   if (!Error.empty())
     return false;
   delete Safe;
 
-  FileRemover SharedObjectRemover(sys::Path(SharedObject), !SaveTemps);
+  FileRemover SharedObjectRemover(SharedObject, !SaveTemps);
 
   // Run the code generator on the `Test' code, loading the shared library.
   // The function returns whether or not the new output differs from reference.
diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp
index 37cc902..6c46ef1 100644
--- a/tools/bugpoint/ToolRunner.cpp
+++ b/tools/bugpoint/ToolRunner.cpp
@@ -503,7 +503,7 @@ int LLC::ExecuteProgram(const std::string &Bitcode,
   sys::Path OutputAsmFile;
   GCC::FileType FileKind = OutputCode(Bitcode, OutputAsmFile, *Error, Timeout,
                                       MemoryLimit);
-  FileRemover OutFileRemover(OutputAsmFile, !SaveTemps);
+  FileRemover OutFileRemover(OutputAsmFile.str(), !SaveTemps);
 
   std::vector<std::string> GCCArgs(ArgsForGCC);
   GCCArgs.insert(GCCArgs.end(), SharedLibs.begin(), SharedLibs.end());
@@ -675,7 +675,7 @@ int CBE::ExecuteProgram(const std::string &Bitcode,
   sys::Path OutputCFile;
   OutputCode(Bitcode, OutputCFile, *Error, Timeout, MemoryLimit);
 
-  FileRemover CFileRemove(OutputCFile, !SaveTemps);
+  FileRemover CFileRemove(OutputCFile.str(), !SaveTemps);
 
   std::vector<std::string> GCCArgs(ArgsForGCC);
   GCCArgs.insert(GCCArgs.end(), SharedLibs.begin(), SharedLibs.end());
@@ -758,8 +758,7 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
       // For ARM architectures we don't want this flag. bugpoint isn't
       // explicitly told what architecture it is working on, so we get
       // it from gcc flags
-      if ((TargetTriple.getOS() == Triple::Darwin) &&
-          !IsARMArchitecture(GCCArgs))
+      if (TargetTriple.isOSDarwin() && !IsARMArchitecture(GCCArgs))
         GCCArgs.push_back("-force_cpusubtype_ALL");
     }
   }
@@ -851,7 +850,7 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
         errs() << "\n";
         );
 
-  FileRemover OutputBinaryRemover(OutputBinary, !SaveTemps);
+  FileRemover OutputBinaryRemover(OutputBinary.str(), !SaveTemps);
 
   if (RemoteClientPath.isEmpty()) {
     DEBUG(errs() << "<run locally>");
@@ -900,7 +899,7 @@ int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
   GCCArgs.push_back("none");
   if (TargetTriple.getArch() == Triple::sparc)
     GCCArgs.push_back("-G");       // Compile a shared library, `-G' for Sparc
-  else if (TargetTriple.getOS() == Triple::Darwin) {
+  else if (TargetTriple.isOSDarwin()) {
     // link all source files into a single module in data segment, rather than
     // generating blocks. dynamic_lookup requires that you set
     // MACOSX_DEPLOYMENT_TARGET=10.3 in your env.  FIXME: it would be better for
diff --git a/tools/edis/CMakeLists.txt b/tools/edis/CMakeLists.txt
index 5037f9f..9406c2a 100644
--- a/tools/edis/CMakeLists.txt
+++ b/tools/edis/CMakeLists.txt
@@ -1,11 +1,34 @@
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
-add_llvm_library(EnhancedDisassembly
+set(SOURCES
   ../../include/llvm-c/EnhancedDisassembly.h
   EDMain.cpp
-)
+  )
 
-set_target_properties(EnhancedDisassembly
-  PROPERTIES
-  LINKER_LANGUAGE CXX)
+set(LLVM_LINK_COMPONENTS mcdisassembler)
+if( LLVM_TARGETS_TO_BUILD MATCHES X86 )
+  list(APPEND LLVM_LINK_COMPONENTS x86asmprinter x86disassembler)
+endif()
+if( LLVM_TARGETS_TO_BUILD MATCHES ARM )
+  list(APPEND LLVM_LINK_COMPONENTS armasmprinter armdisassembler)
+endif()
 
+# TODO: Process EnhancedDisassembly.exports
+
+if( NOT WIN32 AND LLVM_ENABLE_PIC )
+  set(bsl ${BUILD_SHARED_LIBS})
+  set(BUILD_SHARED_LIBS ON)
+  add_llvm_library(EnhancedDisassembly ${SOURCES})
+  set_property(TARGET EnhancedDisassembly PROPERTY
+    OUTPUT_NAME "EnhancedDisassembly")
+  set(BUILD_SHARED_LIBS ${bsl})
+  set(EnhancedDisassembly_STATIC_TARGET_NAME EnhancedDisassembly_static)
+else()
+  set(EnhancedDisassembly_STATIC_TARGET_NAME EnhancedDisassembly)
+endif()
+
+if( NOT BUILD_SHARED_LIBS )
+  add_llvm_library(${EnhancedDisassembly_STATIC_TARGET_NAME} ${SOURCES})
+  set_property(TARGET ${EnhancedDisassembly_STATIC_TARGET_NAME} PROPERTY
+    OUTPUT_NAME "EnhancedDisassembly")
+endif()
diff --git a/tools/gold/CMakeLists.txt b/tools/gold/CMakeLists.txt
new file mode 100644
index 0000000..d8633e6
--- /dev/null
+++ b/tools/gold/CMakeLists.txt
@@ -0,0 +1,43 @@
+set(LLVM_BINUTILS_INCDIR "/usr/include" CACHE PATH
+  "PATH to binutils/include containing plugin-api.h for gold plugin.")
+
+if( NOT EXISTS "${LLVM_BINUTILS_INCDIR}/plugin-api.h" )
+  message(STATUS "plugin-api.h not found. gold plugin excluded from the build.")
+else()
+  include_directories( ${LLVM_BINUTILS_INCDIR} )
+
+  # Because off_t is used in the public API, the largefile parts are required for
+  # ABI compatibility.
+  add_definitions( -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 )
+
+  set(LLVM_LINK_COMPONENTS support)
+
+  add_llvm_loadable_module(LLVMgold
+    gold-plugin.cpp
+    )
+
+  # Makefile.rules contains a special cases for OpenBSD, Darwin and
+  # Windows. We restrict ourselves to Linux for the time being.
+  set(srcexp ${CMAKE_CURRENT_SOURCE_DIR}/gold.exports)
+  add_custom_command(OUTPUT exportsfile
+    COMMAND echo "{" > exportsfile
+    COMMAND grep -q "\\<" ${srcexp} && echo "  global:" >> exportsfile || :
+    COMMAND sed -e "s/$/;/" -e "s/^/    /" < ${srcexp} >> exportsfile
+    COMMAND echo "  local: *;" >> exportsfile
+    COMMAND echo "};" >> exportsfile
+    DEPENDS ${srcexp}
+    VERBATIM
+    COMMENT "Creating export file for gold plugin")
+  add_custom_target(gold_exports DEPENDS exportsfile)
+  set_property(DIRECTORY APPEND
+    PROPERTY ADDITIONAL_MAKE_CLEAN_FILES exportsfile)
+
+  # Force re-linking when the exports file changes. Actually, it
+  # forces recompilation of gold-plugin.cpp. The LINK_DEPENDS target
+  # property only works for makefile-based generators.
+  set_property(SOURCE gold-plugin.cpp APPEND PROPERTY
+    OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/exportsfile)
+
+  target_link_libraries(LLVMgold LTO -Wl,--version-script,exportsfile)
+  add_dependencies(LLVMgold gold_exports)
+endif()
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 7ce1760..dd66eae 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -50,6 +50,7 @@ namespace {
   ld_plugin_add_input_file add_input_file = NULL;
   ld_plugin_add_input_library add_input_library = NULL;
   ld_plugin_set_extra_library_path set_extra_library_path = NULL;
+  ld_plugin_get_view get_view = NULL;
   ld_plugin_message message = discard_message;
 
   int api_version = 0;
@@ -73,7 +74,6 @@ namespace options {
   static generate_bc generate_bc_file = BC_NO;
   static std::string bc_path;
   static std::string obj_path;
-  static std::vector<std::string> pass_through;
   static std::string extra_library_path;
   static std::string triple;
   static std::string mcpu;
@@ -96,9 +96,6 @@ namespace options {
       mcpu = opt.substr(strlen("mcpu="));
     } else if (opt.startswith("extra-library-path=")) {
       extra_library_path = opt.substr(strlen("extra_library_path="));
-    } else if (opt.startswith("pass-through=")) {
-      llvm::StringRef item = opt.substr(strlen("pass-through="));
-      pass_through.push_back(item.str());
     } else if (opt.startswith("mtriple=")) {
       triple = opt.substr(strlen("mtriple="));
     } else if (opt.startswith("obj-path=")) {
@@ -209,6 +206,9 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
       case LDPT_SET_EXTRA_LIBRARY_PATH:
         set_extra_library_path = tv->tv_u.tv_set_extra_library_path;
         break;
+      case LDPT_GET_VIEW:
+        get_view = tv->tv_u.tv_get_view;
+        break;
       case LDPT_MESSAGE:
         message = tv->tv_u.tv_message;
         break;
@@ -236,60 +236,23 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
                                         int *claimed) {
   lto_module_t M;
 
-  if (file->offset) {
-    // Gold has found what might be IR part-way inside of a file, such as
-    // an .a archive.
-    if (lseek(file->fd, file->offset, SEEK_SET) == -1) {
-      (*message)(LDPL_ERROR,
-                 "Failed to seek to archive member of %s at offset %d: %s\n",
-                 file->name,
-                 file->offset, sys::StrError(errno).c_str());
-      return LDPS_ERR;
-    }
-    void *buf = malloc(file->filesize);
-    if (!buf) {
-      (*message)(LDPL_ERROR,
-                 "Failed to allocate buffer for archive member of size: %d\n",
-                 file->filesize);
-      return LDPS_ERR;
-    }
-    if (read(file->fd, buf, file->filesize) != file->filesize) {
-      (*message)(LDPL_ERROR,
-                 "Failed to read archive member of %s at offset %d: %s\n",
-                 file->name,
-                 file->offset,
-                 sys::StrError(errno).c_str());
-      free(buf);
-      return LDPS_ERR;
-    }
-    if (!lto_module_is_object_file_in_memory(buf, file->filesize)) {
-      free(buf);
-      return LDPS_OK;
-    }
-    M = lto_module_create_from_memory(buf, file->filesize);
-    if (!M) {
-      (*message)(LDPL_ERROR, "Failed to create LLVM module: %s",
-                 lto_get_error_message());
+  if (get_view) {
+    const void *view;
+    if (get_view(file->handle, &view) != LDPS_OK) {
+      (*message)(LDPL_ERROR, "Failed to get a view of %s", file->name);
       return LDPS_ERR;
     }
-    free(buf);
+    M = lto_module_create_from_memory(view, file->filesize);
+  } else if (file->offset) {
+    // Gold has found what might be IR part-way inside of a file, such as
+    // an .a archive.
+    M = lto_module_create_from_fd_at_offset(file->fd, file->name, -1,
+                                            file->filesize, file->offset);
   } else {
-    // FIXME: We should not need to pass -1 as the file size, but there
-    // is a bug in BFD that causes it to pass 0 to us. Remove this once
-    // that is fixed.
-    off_t size = file->filesize ? file->filesize : -1;
-
-    // FIXME: We should not need to reset the position in the file, but there
-    // is a bug in BFD. Remove this once that is fixed.
-    off_t old_pos = lseek(file->fd, 0, SEEK_CUR);
-
-    lseek(file->fd, 0, SEEK_SET);
-    M = lto_module_create_from_fd(file->fd, file->name, size);
-
-    lseek(file->fd, old_pos, SEEK_SET);
-    if (!M)
-      return LDPS_OK;
+    M = lto_module_create_from_fd(file->fd, file->name, file->filesize);
   }
+  if (!M)
+    return LDPS_OK;
 
   *claimed = 1;
   Modules.resize(Modules.size() + 1);
@@ -446,38 +409,10 @@ static ld_plugin_status all_symbols_read_hook(void) {
       exit(0);
   }
   size_t bufsize = 0;
-  const char *buffer = static_cast<const char *>(lto_codegen_compile(code_gen,
-                                                                     &bufsize));
-
-  std::string ErrMsg;
-
   const char *objPath;
-  sys::Path uniqueObjPath("/tmp/llvmgold.o");
-  if (!options::obj_path.empty()) {
-    objPath = options::obj_path.c_str();
-  } else {
-    if (uniqueObjPath.createTemporaryFileOnDisk(true, &ErrMsg)) {
-      (*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
-      return LDPS_ERR;
-    }
-    objPath = uniqueObjPath.c_str();
-  }
-  tool_output_file objFile(objPath, ErrMsg,
-                             raw_fd_ostream::F_Binary);
-    if (!ErrMsg.empty()) {
-      (*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
-      return LDPS_ERR;
-    }
-
-  objFile.os().write(buffer, bufsize);
-  objFile.os().close();
-  if (objFile.os().has_error()) {
-    (*message)(LDPL_ERROR, "Error writing output file '%s'",
-               objPath);
-    objFile.os().clear_error();
-    return LDPS_ERR;
+  if (lto_codegen_compile_to_file(code_gen, &objPath)) {
+    (*message)(LDPL_ERROR, "Could not produce a combined object file\n");
   }
-  objFile.keep();
 
   lto_codegen_dispose(code_gen);
   for (std::list<claimed_file>::iterator I = Modules.begin(),
@@ -500,24 +435,6 @@ static ld_plugin_status all_symbols_read_hook(void) {
     return LDPS_ERR;
   }
 
-  for (std::vector<std::string>::iterator i = options::pass_through.begin(),
-                                          e = options::pass_through.end();
-       i != e; ++i) {
-    std::string &item = *i;
-    const char *item_p = item.c_str();
-    if (llvm::StringRef(item).startswith("-l")) {
-      if (add_input_library(item_p + 2) != LDPS_OK) {
-        (*message)(LDPL_ERROR, "Unable to add library to the link.");
-        return LDPS_ERR;
-      }
-    } else {
-      if (add_input_file(item_p) != LDPS_OK) {
-        (*message)(LDPL_ERROR, "Unable to add .o file to the link.");
-        return LDPS_ERR;
-      }
-    }
-  }
-
   if (options::obj_path.empty())
     Cleanup.push_back(sys::Path(objPath));
 
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index bb426a9..a36b6d7 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -99,6 +99,9 @@ cl::opt<bool> NoVerify("disable-verify", cl::Hidden,
 cl::opt<bool> DisableDotLoc("disable-dot-loc", cl::Hidden,
                             cl::desc("Do not use .loc entries"));
 
+cl::opt<bool> DisableCFI("disable-cfi", cl::Hidden,
+                         cl::desc("Do not use .cfi_* directives"));
+
 static cl::opt<bool>
 DisableRedZone("disable-red-zone",
   cl::desc("Do not emit code that uses the red zone."),
@@ -207,7 +210,7 @@ int main(int argc, char **argv) {
   InitializeAllAsmParsers();
 
   cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
-  
+
   // Load the module to be compiled...
   SMDiagnostic Err;
   std::auto_ptr<Module> M;
@@ -271,25 +274,21 @@ int main(int argc, char **argv) {
     FeaturesStr = Features.getString();
   }
 
-  std::auto_ptr<TargetMachine> 
+  std::auto_ptr<TargetMachine>
     target(TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr));
   assert(target.get() && "Could not allocate target machine!");
   TargetMachine &Target = *target.get();
 
   if (DisableDotLoc)
     Target.setMCUseLoc(false);
-  if (TheTriple.getOS() == Triple::Darwin) {
-    switch (TheTriple.getDarwinMajorNumber()) {
-    case 7:
-    case 8:
-    case 9:
-      // disable .loc support for older darwin OS.
-      Target.setMCUseLoc(false);
-      break;
-    default:
-      break;
-    }
-  }
+
+  if (DisableCFI)
+    Target.setMCUseCFI(false);
+
+  // Disable .loc support for older OS X versions.
+  if (TheTriple.isMacOSX() &&
+      TheTriple.isMacOSXVersionLT(10, 6))
+    Target.setMCUseLoc(false);
 
   // Figure out where we are going to send the output...
   OwningPtr<tool_output_file> Out
@@ -338,6 +337,9 @@ int main(int argc, char **argv) {
       return 1;
     }
 
+    // Before executing passes, print the final values of the LLVM options.
+    cl::PrintOptionValues();
+
     PM.run(mod);
   }
 
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index a756459..014925c 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -133,6 +133,7 @@ int main(int argc, char **argv, char * const *envp) {
   // If we have a native target, initialize it to ensure it is linked in and
   // usable by the JIT.
   InitializeNativeTarget();
+  InitializeNativeTargetAsmPrinter();
 
   cl::ParseCommandLineOptions(argc, argv,
                               "llvm interpreter & dynamic compiler\n");
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 980f278..9a92671 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -471,11 +471,11 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
 }
 
 static void PrintSize(double Bits) {
-  fprintf(stderr, "%.2f/%.2fB/%lluW", Bits, Bits/8,(unsigned long long)Bits/32);
+  fprintf(stderr, "%.2f/%.2fB/%luW", Bits, Bits/8,(unsigned long)(Bits/32));
 }
 static void PrintSize(uint64_t Bits) {
-  fprintf(stderr, "%llub/%.2fB/%lluW", (unsigned long long)Bits,
-          (double)Bits/8, (unsigned long long)Bits/32);
+  fprintf(stderr, "%lub/%.2fB/%luW", (unsigned long)Bits,
+          (double)Bits/8, (unsigned long)(Bits/32));
 }
 
 
@@ -601,8 +601,8 @@ static int AnalyzeBitcode() {
       for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
         const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second];
 
-        fprintf(stderr, "\t\t%7d %9llu ", RecStats.NumInstances,
-                (unsigned long long)RecStats.TotalBits);
+        fprintf(stderr, "\t\t%7d %9lu ", RecStats.NumInstances,
+                (unsigned long)RecStats.TotalBits);
 
         if (RecStats.NumAbbrev)
           fprintf(stderr, "%7.2f  ",
diff --git a/tools/llvm-config/CMakeLists.txt b/tools/llvm-config/CMakeLists.txt
index b6f4289..bc23a64 100644
--- a/tools/llvm-config/CMakeLists.txt
+++ b/tools/llvm-config/CMakeLists.txt
@@ -7,7 +7,7 @@ endif( NOT PERL_FOUND )
 
 set(PERL ${PERL_EXECUTABLE})
 set(VERSION PACKAGE_VERSION)
-set(PREFIX ${LLVM_BINARY_DIR}) # TODO: Root for `make install'.
+set(PREFIX ${CMAKE_INSTALL_PREFIX})
 set(abs_top_srcdir ${LLVM_MAIN_SRC_DIR})
 set(abs_top_builddir ${LLVM_BINARY_DIR})
 execute_process(COMMAND date
@@ -27,6 +27,7 @@ endif( IS_BIG_ENDIAN )
 set(SHLIBEXT ${LTDL_SHLIB_EXT})
 #EXEEXT already set.
 set(OS "${CMAKE_SYSTEM}")
+set(target "${TARGET_TRIPLE}")
 set(ARCH "${LLVM_NATIVE_ARCH}")
 
 get_system_libs(LLVM_SYSTEM_LIBS_LIST)
@@ -54,9 +55,9 @@ configure_file(
   @ONLY
 )
 
-set(LIBDEPS ${CMAKE_CURRENT_BINARY_DIR}/LibDeps.txt)
-set(LIBDEPS_TMP ${CMAKE_CURRENT_BINARY_DIR}/LibDeps.txt.tmp)
-set(FINAL_LIBDEPS ${CMAKE_CURRENT_BINARY_DIR}/FinalLibDeps.txt)
+set(LIBDEPS LibDeps.txt)
+set(LIBDEPS_TMP LibDeps.txt.tmp)
+set(FINAL_LIBDEPS FinalLibDeps.txt)
 set(LLVM_CONFIG ${LLVM_TOOLS_BINARY_DIR}/llvm-config)
 set(LLVM_CONFIG_IN ${CMAKE_CURRENT_BINARY_DIR}/llvm-config.in)
 
@@ -90,9 +91,9 @@ add_custom_command(OUTPUT ${FINAL_LIBDEPS}
   DEPENDS ${LIBDEPS}
   COMMENT "Checking for cyclic dependencies between LLVM libraries.")
 
-set(C_FLGS "${CMAKE_C_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
-set(CXX_FLGS "${CMAKE_CXX_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
-set(CPP_FLGS "${CMAKE_CPP_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
+set(C_FLGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
+set(CXX_FLGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
+set(CPP_FLGS "${CMAKE_CPP_FLAGS} ${CMAKE_CPP_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
 
 # We don't want certain flags on the output of
 # llvm-config --cflags --cxxflags
@@ -106,17 +107,18 @@ remove_option_from_llvm_config("-Wall")
 remove_option_from_llvm_config("-W")
 
 add_custom_command(OUTPUT ${LLVM_CONFIG}
-  COMMAND echo 's!@LLVM_CPPFLAGS@!${CPP_FLGS}!' > temp.sed
-  COMMAND echo 's!@LLVM_CFLAGS@!${C_FLGS}!' >> temp.sed
-  COMMAND echo 's!@LLVM_CXXFLAGS@!${CXX_FLGS}!' >> temp.sed
+  COMMAND echo s!@LLVM_CPPFLAGS@!${CPP_FLGS}! > temp.sed
+  COMMAND echo s!@LLVM_CFLAGS@!${C_FLGS}! >> temp.sed
+  COMMAND echo s!@LLVM_CXXFLAGS@!${CXX_FLGS}! >> temp.sed
   # TODO: Use general flags for linking! not just for shared libs:
-  COMMAND echo 's!@LLVM_LDFLAGS@!${CMAKE_SHARED_LINKER_FLAGS}!' >> temp.sed
-  COMMAND echo 's!@LIBS@!${LLVM_SYSTEM_LIBS}!' >> temp.sed
-  COMMAND echo 's!@LLVM_BUILDMODE@!${CMAKE_BUILD_TYPE}!' >> temp.sed
+  COMMAND echo s!@LLVM_LDFLAGS@!${CMAKE_SHARED_LINKER_FLAGS}! >> temp.sed
+  COMMAND echo s!@LIBS@!${LLVM_SYSTEM_LIBS}! >> temp.sed
+  COMMAND echo s!@LLVM_BUILDMODE@!${CMAKE_BUILD_TYPE}! >> temp.sed
   COMMAND sed -f temp.sed < ${LLVM_CONFIG_IN} > ${LLVM_CONFIG}
   COMMAND ${CMAKE_COMMAND} -E remove -f temp.sed
   COMMAND cat ${FINAL_LIBDEPS} >> ${LLVM_CONFIG}
   COMMAND chmod +x ${LLVM_CONFIG}
+  VERBATIM
   DEPENDS ${FINAL_LIBDEPS} ${LLVM_CONFIG_IN}
   COMMENT "Building llvm-config script."
   )
@@ -140,7 +142,7 @@ install(FILES ${LLVM_CONFIG}
 
 # Regeneration of library dependencies.
 
-# See the comments at the end of cmake/modules/LLVMConfig.cmake for
+# See the comments at the end of cmake/modules/LLVM-Config.cmake for
 # notes and guidelines.
 
 set(LLVMLibDeps ${LLVM_MAIN_SRC_DIR}/cmake/modules/LLVMLibDeps.cmake)
diff --git a/tools/llvm-diff/CMakeLists.txt b/tools/llvm-diff/CMakeLists.txt
index f6d65c9..c59d69e 100644
--- a/tools/llvm-diff/CMakeLists.txt
+++ b/tools/llvm-diff/CMakeLists.txt
@@ -2,5 +2,7 @@ set(LLVM_LINK_COMPONENTS support asmparser bitreader)
 
 add_llvm_tool(llvm-diff
   llvm-diff.cpp
+  DiffConsumer.cpp
+  DiffLog.cpp
   DifferenceEngine.cpp
   )
diff --git a/tools/llvm-diff/DiffConsumer.cpp b/tools/llvm-diff/DiffConsumer.cpp
new file mode 100644
index 0000000..c23e8fb
--- /dev/null
+++ b/tools/llvm-diff/DiffConsumer.cpp
@@ -0,0 +1,209 @@
+//===-- DiffConsumer.cpp - Difference Consumer ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This files implements the the LLVM difference Consumer
+//
+//===----------------------------------------------------------------------===//
+
+#include "DiffConsumer.h"
+
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+static void ComputeNumbering(Function *F, DenseMap<Value*,unsigned> &Numbering){
+  unsigned IN = 0;
+
+  // Arguments get the first numbers.
+  for (Function::arg_iterator
+         AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI)
+    if (!AI->hasName())
+      Numbering[&*AI] = IN++;
+
+  // Walk the basic blocks in order.
+  for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) {
+    if (!FI->hasName())
+      Numbering[&*FI] = IN++;
+
+    // Walk the instructions in order.
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI)
+      // void instructions don't get numbers.
+      if (!BI->hasName() && !BI->getType()->isVoidTy())
+        Numbering[&*BI] = IN++;
+  }
+
+  assert(!Numbering.empty() && "asked for numbering but numbering was no-op");
+}
+
+
+void DiffConsumer::printValue(Value *V, bool isL) {
+  if (V->hasName()) {
+    out << (isa<GlobalValue>(V) ? '@' : '%') << V->getName();
+    return;
+  }
+  if (V->getType()->isVoidTy()) {
+    if (isa<StoreInst>(V)) {
+      out << "store to ";
+      printValue(cast<StoreInst>(V)->getPointerOperand(), isL);
+    } else if (isa<CallInst>(V)) {
+      out << "call to ";
+      printValue(cast<CallInst>(V)->getCalledValue(), isL);
+    } else if (isa<InvokeInst>(V)) {
+      out << "invoke to ";
+      printValue(cast<InvokeInst>(V)->getCalledValue(), isL);
+    } else {
+      out << *V;
+    }
+    return;
+  }
+
+  unsigned N = contexts.size();
+  while (N > 0) {
+    --N;
+    DiffContext &ctxt = contexts[N];
+    if (!ctxt.IsFunction) continue;
+    if (isL) {
+      if (ctxt.LNumbering.empty())
+        ComputeNumbering(cast<Function>(ctxt.L), ctxt.LNumbering);
+      out << '%' << ctxt.LNumbering[V];
+      return;
+    } else {
+      if (ctxt.RNumbering.empty())
+        ComputeNumbering(cast<Function>(ctxt.R), ctxt.RNumbering);
+      out << '%' << ctxt.RNumbering[V];
+      return;
+    }
+  }
+
+  out << "<anonymous>";
+}
+
+void DiffConsumer::header() {
+  if (contexts.empty()) return;
+  for (SmallVectorImpl<DiffContext>::iterator
+         I = contexts.begin(), E = contexts.end(); I != E; ++I) {
+    if (I->Differences) continue;
+    if (isa<Function>(I->L)) {
+      // Extra newline between functions.
+      if (Differences) out << "\n";
+
+      Function *L = cast<Function>(I->L);
+      Function *R = cast<Function>(I->R);
+      if (L->getName() != R->getName())
+        out << "in function " << L->getName()
+            << " / " << R->getName() << ":\n";
+      else
+        out << "in function " << L->getName() << ":\n";
+    } else if (isa<BasicBlock>(I->L)) {
+      BasicBlock *L = cast<BasicBlock>(I->L);
+      BasicBlock *R = cast<BasicBlock>(I->R);
+      if (L->hasName() && R->hasName() && L->getName() == R->getName())
+        out << "  in block %" << L->getName() << ":\n";
+      else {
+        out << "  in block ";
+        printValue(L, true);
+        out << " / ";
+        printValue(R, false);
+        out << ":\n";
+      }
+    } else if (isa<Instruction>(I->L)) {
+      out << "    in instruction ";
+      printValue(I->L, true);
+      out << " / ";
+      printValue(I->R, false);
+      out << ":\n";
+    }
+
+    I->Differences = true;
+  }
+}
+
+void DiffConsumer::indent() {
+  unsigned N = Indent;
+  while (N--) out << ' ';
+}
+
+bool DiffConsumer::hadDifferences() const {
+  return Differences;
+}
+
+void DiffConsumer::enterContext(Value *L, Value *R) {
+  contexts.push_back(DiffContext(L, R));
+  Indent += 2;
+}
+
+void DiffConsumer::exitContext() {
+  Differences |= contexts.back().Differences;
+  contexts.pop_back();
+  Indent -= 2;
+}
+
+void DiffConsumer::log(StringRef text) {
+  header();
+  indent();
+  out << text << '\n';
+}
+
+void DiffConsumer::logf(const LogBuilder &Log) {
+  header();
+  indent();
+
+  unsigned arg = 0;
+
+  StringRef format = Log.getFormat();
+  while (true) {
+    size_t percent = format.find('%');
+    if (percent == StringRef::npos) {
+      out << format;
+      break;
+    }
+    assert(format[percent] == '%');
+
+    if (percent > 0) out << format.substr(0, percent);
+
+    switch (format[percent+1]) {
+    case '%': out << '%'; break;
+    case 'l': printValue(Log.getArgument(arg++), true); break;
+    case 'r': printValue(Log.getArgument(arg++), false); break;
+    default: llvm_unreachable("unknown format character");
+    }
+
+    format = format.substr(percent+2);
+  }
+
+  out << '\n';
+}
+
+void DiffConsumer::logd(const DiffLogBuilder &Log) {
+  header();
+
+  for (unsigned I = 0, E = Log.getNumLines(); I != E; ++I) {
+    indent();
+    switch (Log.getLineKind(I)) {
+    case DC_match:
+      out << "  ";
+      Log.getLeft(I)->dump();
+      //printValue(Log.getLeft(I), true);
+      break;
+    case DC_left:
+      out << "< ";
+      Log.getLeft(I)->dump();
+      //printValue(Log.getLeft(I), true);
+      break;
+    case DC_right:
+      out << "> ";
+      Log.getRight(I)->dump();
+      //printValue(Log.getRight(I), false);
+      break;
+    }
+    //out << "\n";
+  }
+}
diff --git a/tools/llvm-diff/DiffConsumer.h b/tools/llvm-diff/DiffConsumer.h
new file mode 100644
index 0000000..b95d427
--- /dev/null
+++ b/tools/llvm-diff/DiffConsumer.h
@@ -0,0 +1,92 @@
+//===-- DiffConsumer.h - Difference Consumer --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the interface to the LLVM difference Consumer
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LLVM_DIFFCONSUMER_H_
+#define _LLVM_DIFFCONSUMER_H_
+
+#include "DiffLog.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Casting.h"
+
+namespace llvm {
+  class Module;
+  class Value;
+  class Function;
+
+  /// The interface for consumers of difference data.
+  class Consumer {
+  public:
+    /// Record that a local context has been entered.  Left and
+    /// Right are IR "containers" of some sort which are being
+    /// considered for structural equivalence: global variables,
+    /// functions, blocks, instructions, etc.
+    virtual void enterContext(Value *Left, Value *Right) = 0;
+
+    /// Record that a local context has been exited.
+    virtual void exitContext() = 0;
+
+    /// Record a difference within the current context.
+    virtual void log(StringRef Text) = 0;
+
+    /// Record a formatted difference within the current context.
+    virtual void logf(const LogBuilder &Log) = 0;
+
+    /// Record a line-by-line instruction diff.
+    virtual void logd(const DiffLogBuilder &Log) = 0;
+
+  protected:
+    virtual ~Consumer() {}
+  };
+
+  class DiffConsumer : public Consumer {
+  private:
+    struct DiffContext {
+      DiffContext(Value *L, Value *R)
+        : L(L), R(R), Differences(false), IsFunction(isa<Function>(L)) {}
+      Value *L;
+      Value *R;
+      bool Differences;
+      bool IsFunction;
+      DenseMap<Value*,unsigned> LNumbering;
+      DenseMap<Value*,unsigned> RNumbering;
+    };
+
+    raw_ostream &out;
+    Module *LModule;
+    Module *RModule;
+    SmallVector<DiffContext, 5> contexts;
+    bool Differences;
+    unsigned Indent;
+
+    void printValue(Value *V, bool isL);
+    void header();
+    void indent();
+
+  public:
+    DiffConsumer(Module *L, Module *R)
+      : out(errs()), LModule(L), RModule(R), Differences(false), Indent(0) {}
+
+    bool hadDifferences() const;
+    void enterContext(Value *L, Value *R);
+    void exitContext();
+    void log(StringRef text);
+    void logf(const LogBuilder &Log);
+    void logd(const DiffLogBuilder &Log);
+  };
+}
+
+#endif
diff --git a/tools/llvm-diff/DiffLog.cpp b/tools/llvm-diff/DiffLog.cpp
new file mode 100644
index 0000000..9cc0c88
--- /dev/null
+++ b/tools/llvm-diff/DiffLog.cpp
@@ -0,0 +1,53 @@
+//===-- DiffLog.h - Difference Log Builder and accessories ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the interface to the LLVM difference log builder.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DiffLog.h"
+#include "DiffConsumer.h"
+
+#include "llvm/Instructions.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+
+using namespace llvm;
+
+LogBuilder::~LogBuilder() {
+  consumer.logf(*this);
+}
+
+StringRef LogBuilder::getFormat() const { return Format; }
+
+unsigned LogBuilder::getNumArguments() const { return Arguments.size(); }
+Value *LogBuilder::getArgument(unsigned I) const { return Arguments[I]; }
+
+DiffLogBuilder::~DiffLogBuilder() { consumer.logd(*this); }
+
+void DiffLogBuilder::addMatch(Instruction *L, Instruction *R) {
+  Diff.push_back(DiffRecord(L, R));
+}
+void DiffLogBuilder::addLeft(Instruction *L) {
+  // HACK: VS 2010 has a bug in the stdlib that requires this.
+  Diff.push_back(DiffRecord(L, DiffRecord::second_type(0)));
+}
+void DiffLogBuilder::addRight(Instruction *R) {
+  // HACK: VS 2010 has a bug in the stdlib that requires this.
+  Diff.push_back(DiffRecord(DiffRecord::first_type(0), R));
+}
+
+unsigned DiffLogBuilder::getNumLines() const { return Diff.size(); }
+
+DiffChange DiffLogBuilder::getLineKind(unsigned I) const {
+  return (Diff[I].first ? (Diff[I].second ? DC_match : DC_left)
+                        : DC_right);
+}
+Instruction *DiffLogBuilder::getLeft(unsigned I) const { return Diff[I].first; }
+Instruction *DiffLogBuilder::getRight(unsigned I) const { return Diff[I].second; }
diff --git a/tools/llvm-diff/DiffLog.h b/tools/llvm-diff/DiffLog.h
new file mode 100644
index 0000000..43e318a
--- /dev/null
+++ b/tools/llvm-diff/DiffLog.h
@@ -0,0 +1,80 @@
+//===-- DiffLog.h - Difference Log Builder and accessories ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the interface to the LLVM difference log builder.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LLVM_DIFFLOG_H_
+#define _LLVM_DIFFLOG_H_
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+  class Instruction;
+  class Value;
+  class Consumer;
+
+  /// Trichotomy assumption
+  enum DiffChange { DC_match, DC_left, DC_right };
+
+  /// A temporary-object class for building up log messages.
+  class LogBuilder {
+    Consumer &consumer;
+
+    /// The use of a stored StringRef here is okay because
+    /// LogBuilder should be used only as a temporary, and as a
+    /// temporary it will be destructed before whatever temporary
+    /// might be initializing this format.
+    StringRef Format;
+
+    SmallVector<Value*, 4> Arguments;
+
+  public:
+    LogBuilder(Consumer &c, StringRef Format)
+      : consumer(c), Format(Format) {}
+
+    LogBuilder &operator<<(Value *V) {
+      Arguments.push_back(V);
+      return *this;
+    }
+
+    ~LogBuilder();
+
+    StringRef getFormat() const;
+    unsigned getNumArguments() const;
+    Value *getArgument(unsigned I) const;
+  };
+
+  /// A temporary-object class for building up diff messages.
+  class DiffLogBuilder {
+    typedef std::pair<Instruction*,Instruction*> DiffRecord;
+    SmallVector<DiffRecord, 20> Diff;
+
+    Consumer &consumer;
+
+  public:
+    DiffLogBuilder(Consumer &c) : consumer(c) {}
+    ~DiffLogBuilder();
+
+    void addMatch(Instruction *L, Instruction *R);
+    // HACK: VS 2010 has a bug in the stdlib that requires this.
+    void addLeft(Instruction *L);
+    void addRight(Instruction *R);
+
+    unsigned getNumLines() const;
+    DiffChange getLineKind(unsigned I) const;
+    Instruction *getLeft(unsigned I) const;
+    Instruction *getRight(unsigned I) const;
+  };
+
+}
+
+#endif
diff --git a/tools/llvm-diff/DifferenceEngine.cpp b/tools/llvm-diff/DifferenceEngine.cpp
index b0a24d0..ba2cec2 100644
--- a/tools/llvm-diff/DifferenceEngine.cpp
+++ b/tools/llvm-diff/DifferenceEngine.cpp
@@ -14,6 +14,7 @@
 
 #include "DifferenceEngine.h"
 
+#include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
@@ -266,7 +267,7 @@ class FunctionDifferenceEngine {
     } else if (isa<PHINode>(L)) {
       // FIXME: implement.
 
-      // This is really wierd;  type uniquing is broken?
+      // This is really weird;  type uniquing is broken?
       if (L->getType() != R->getType()) {
         if (!L->getType()->isPointerTy() || !R->getType()->isPointerTy()) {
           if (Complain) Engine.log("different phi types");
@@ -506,30 +507,30 @@ void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart,
   for (unsigned I = 0; I != NL+1; ++I) {
     Cur[I].Cost = I * LeftCost;
     for (unsigned J = 0; J != I; ++J)
-      Cur[I].Path.push_back(DifferenceEngine::DC_left);
+      Cur[I].Path.push_back(DC_left);
   }
 
   for (BasicBlock::iterator RI = RStart; RI != RE; ++RI) {
     // Initialize the first row.
     Next[0] = Cur[0];
     Next[0].Cost += RightCost;
-    Next[0].Path.push_back(DifferenceEngine::DC_right);
+    Next[0].Path.push_back(DC_right);
 
     unsigned Index = 1;
     for (BasicBlock::iterator LI = LStart; LI != LE; ++LI, ++Index) {
       if (matchForBlockDiff(&*LI, &*RI)) {
         Next[Index] = Cur[Index-1];
         Next[Index].Cost += MatchCost;
-        Next[Index].Path.push_back(DifferenceEngine::DC_match);
+        Next[Index].Path.push_back(DC_match);
         TentativeValues.insert(std::make_pair(&*LI, &*RI));
       } else if (Next[Index-1].Cost <= Cur[Index].Cost) {
         Next[Index] = Next[Index-1];
         Next[Index].Cost += LeftCost;
-        Next[Index].Path.push_back(DifferenceEngine::DC_left);
+        Next[Index].Path.push_back(DC_left);
       } else {
         Next[Index] = Cur[Index];
         Next[Index].Cost += RightCost;
-        Next[Index].Path.push_back(DifferenceEngine::DC_right);
+        Next[Index].Path.push_back(DC_right);
       }
     }
 
@@ -543,23 +544,23 @@ void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart,
   SmallVectorImpl<char> &Path = Cur[NL].Path;
   BasicBlock::iterator LI = LStart, RI = RStart;
 
-  DifferenceEngine::DiffLogBuilder Diff(Engine);
+  DiffLogBuilder Diff(Engine.getConsumer());
 
   // Drop trailing matches.
-  while (Path.back() == DifferenceEngine::DC_match)
+  while (Path.back() == DC_match)
     Path.pop_back();
 
   // Skip leading matches.
   SmallVectorImpl<char>::iterator
     PI = Path.begin(), PE = Path.end();
-  while (PI != PE && *PI == DifferenceEngine::DC_match) {
+  while (PI != PE && *PI == DC_match) {
     unify(&*LI, &*RI);
     ++PI, ++LI, ++RI;
   }
 
   for (; PI != PE; ++PI) {
-    switch (static_cast<DifferenceEngine::DiffChange>(*PI)) {
-    case DifferenceEngine::DC_match:
+    switch (static_cast<DiffChange>(*PI)) {
+    case DC_match:
       assert(LI != LE && RI != RE);
       {
         Instruction *L = &*LI, *R = &*RI;
@@ -569,13 +570,13 @@ void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart,
       ++LI; ++RI;
       break;
 
-    case DifferenceEngine::DC_left:
+    case DC_left:
       assert(LI != LE);
       Diff.addLeft(&*LI);
       ++LI;
       break;
 
-    case DifferenceEngine::DC_right:
+    case DC_right:
       assert(RI != RE);
       Diff.addRight(&*RI);
       ++RI;
diff --git a/tools/llvm-diff/DifferenceEngine.h b/tools/llvm-diff/DifferenceEngine.h
index 6eefb06..5b4f80b 100644
--- a/tools/llvm-diff/DifferenceEngine.h
+++ b/tools/llvm-diff/DifferenceEngine.h
@@ -17,6 +17,8 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "DiffLog.h"
+#include "DiffConsumer.h"
 
 #include <utility>
 
@@ -32,95 +34,6 @@ namespace llvm {
   /// A class for performing structural comparisons of LLVM assembly.
   class DifferenceEngine {
   public:
-    /// A temporary-object class for building up log messages.
-    class LogBuilder {
-      DifferenceEngine &Engine;
-
-      /// The use of a stored StringRef here is okay because
-      /// LogBuilder should be used only as a temporary, and as a
-      /// temporary it will be destructed before whatever temporary
-      /// might be initializing this format.
-      StringRef Format;
-
-      SmallVector<Value*, 4> Arguments;
-
-    public:
-      LogBuilder(DifferenceEngine &Engine, StringRef Format)
-        : Engine(Engine), Format(Format) {}
-
-      LogBuilder &operator<<(Value *V) {
-        Arguments.push_back(V);
-        return *this;
-      }
-
-      ~LogBuilder() {
-        Engine.consumer.logf(*this);
-      }
-
-      StringRef getFormat() const { return Format; }
-
-      unsigned getNumArguments() const { return Arguments.size(); }
-      Value *getArgument(unsigned I) const { return Arguments[I]; }
-    };
-
-    enum DiffChange { DC_match, DC_left, DC_right };
-
-    /// A temporary-object class for building up diff messages.
-    class DiffLogBuilder {
-      typedef std::pair<Instruction*,Instruction*> DiffRecord;
-      SmallVector<DiffRecord, 20> Diff;
-
-      DifferenceEngine &Engine;
-
-    public:
-      DiffLogBuilder(DifferenceEngine &Engine) : Engine(Engine) {}
-      ~DiffLogBuilder() { Engine.consumer.logd(*this); }
-
-      void addMatch(Instruction *L, Instruction *R) {
-        Diff.push_back(DiffRecord(L, R));
-      }
-      void addLeft(Instruction *L) {
-        // HACK: VS 2010 has a bug in the stdlib that requires this.
-        Diff.push_back(DiffRecord(L, DiffRecord::second_type(0)));
-      }
-      void addRight(Instruction *R) {
-        // HACK: VS 2010 has a bug in the stdlib that requires this.
-        Diff.push_back(DiffRecord(DiffRecord::first_type(0), R));
-      }
-
-      unsigned getNumLines() const { return Diff.size(); }
-      DiffChange getLineKind(unsigned I) const {
-        return (Diff[I].first ? (Diff[I].second ? DC_match : DC_left)
-                              : DC_right);
-      }
-      Instruction *getLeft(unsigned I) const { return Diff[I].first; }
-      Instruction *getRight(unsigned I) const { return Diff[I].second; }
-    };
-
-    /// The interface for consumers of difference data.
-    struct Consumer {
-      /// Record that a local context has been entered.  Left and
-      /// Right are IR "containers" of some sort which are being
-      /// considered for structural equivalence: global variables,
-      /// functions, blocks, instructions, etc.
-      virtual void enterContext(Value *Left, Value *Right) = 0;
-
-      /// Record that a local context has been exited.
-      virtual void exitContext() = 0;
-
-      /// Record a difference within the current context.
-      virtual void log(StringRef Text) = 0;
-
-      /// Record a formatted difference within the current context.
-      virtual void logf(const LogBuilder &Log) = 0;
-
-      /// Record a line-by-line instruction diff.
-      virtual void logd(const DiffLogBuilder &Log) = 0;
-
-    protected:
-      virtual ~Consumer() {}
-    };
-
     /// A RAII object for recording the current context.
     struct Context {
       Context(DifferenceEngine &Engine, Value *L, Value *R) : Engine(Engine) {
@@ -149,14 +62,13 @@ namespace llvm {
 
     void diff(Module *L, Module *R);
     void diff(Function *L, Function *R);
-
     void log(StringRef text) {
       consumer.log(text);
     }
-
     LogBuilder logf(StringRef text) {
-      return LogBuilder(*this, text);
+      return LogBuilder(consumer, text);
     }
+    Consumer& getConsumer() const { return consumer; }
 
     /// Installs an oracle to decide whether two global values are
     /// equivalent as operands.  Without an oracle, global values are
diff --git a/tools/llvm-diff/llvm-diff.cpp b/tools/llvm-diff/llvm-diff.cpp
index b932ccc..76853f1 100644
--- a/tools/llvm-diff/llvm-diff.cpp
+++ b/tools/llvm-diff/llvm-diff.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "DiffLog.h"
 #include "DifferenceEngine.h"
 
-#include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Type.h"
@@ -21,7 +21,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/IRReader.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
@@ -43,221 +42,6 @@ static Module *ReadModule(LLVMContext &Context, StringRef Name) {
   return M;
 }
 
-namespace {
-  struct DiffContext {
-    DiffContext(Value *L, Value *R)
-      : L(L), R(R), Differences(false), IsFunction(isa<Function>(L)) {}
-    Value *L;
-    Value *R;
-    bool Differences;
-    bool IsFunction;
-    DenseMap<Value*,unsigned> LNumbering;
-    DenseMap<Value*,unsigned> RNumbering;
-  };
-}
-
-static void ComputeNumbering(Function *F, DenseMap<Value*,unsigned> &Numbering){
-  unsigned IN = 0;
-
-  // Arguments get the first numbers.
-  for (Function::arg_iterator
-         AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI)
-    if (!AI->hasName())
-      Numbering[&*AI] = IN++;
-
-  // Walk the basic blocks in order.
-  for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) {
-    if (!FI->hasName())
-      Numbering[&*FI] = IN++;
-
-    // Walk the instructions in order.
-    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI)
-      // void instructions don't get numbers.
-      if (!BI->hasName() && !BI->getType()->isVoidTy())
-        Numbering[&*BI] = IN++;
-  }
-
-  assert(!Numbering.empty() && "asked for numbering but numbering was no-op");
-}
-
-namespace {
-class DiffConsumer : public DifferenceEngine::Consumer {
-private:
-  raw_ostream &out;
-  Module *LModule;
-  Module *RModule;
-  SmallVector<DiffContext, 5> contexts;
-  bool Differences;
-  unsigned Indent;
-
-  void printValue(Value *V, bool isL) {
-    if (V->hasName()) {
-      out << (isa<GlobalValue>(V) ? '@' : '%') << V->getName();
-      return;
-    }
-    if (V->getType()->isVoidTy()) {
-      if (isa<StoreInst>(V)) {
-        out << "store to ";
-        printValue(cast<StoreInst>(V)->getPointerOperand(), isL);
-      } else if (isa<CallInst>(V)) {
-        out << "call to ";
-        printValue(cast<CallInst>(V)->getCalledValue(), isL);
-      } else if (isa<InvokeInst>(V)) {
-        out << "invoke to ";
-        printValue(cast<InvokeInst>(V)->getCalledValue(), isL);
-      } else {
-        out << *V;
-      }
-      return;
-    }
-
-    unsigned N = contexts.size();
-    while (N > 0) {
-      --N;
-      DiffContext &ctxt = contexts[N];
-      if (!ctxt.IsFunction) continue;
-      if (isL) {
-        if (ctxt.LNumbering.empty())
-          ComputeNumbering(cast<Function>(ctxt.L), ctxt.LNumbering);
-        out << '%' << ctxt.LNumbering[V];
-        return;
-      } else {
-        if (ctxt.RNumbering.empty())
-          ComputeNumbering(cast<Function>(ctxt.R), ctxt.RNumbering);
-        out << '%' << ctxt.RNumbering[V];
-        return;
-      }
-    }
-
-    out << "<anonymous>";
-  }
-
-  void header() {
-    if (contexts.empty()) return;
-    for (SmallVectorImpl<DiffContext>::iterator
-           I = contexts.begin(), E = contexts.end(); I != E; ++I) {
-      if (I->Differences) continue;
-      if (isa<Function>(I->L)) {
-        // Extra newline between functions.
-        if (Differences) out << "\n";
-
-        Function *L = cast<Function>(I->L);
-        Function *R = cast<Function>(I->R);
-        if (L->getName() != R->getName())
-          out << "in function " << L->getName()
-              << " / " << R->getName() << ":\n";
-        else
-          out << "in function " << L->getName() << ":\n";
-      } else if (isa<BasicBlock>(I->L)) {
-        BasicBlock *L = cast<BasicBlock>(I->L);
-        BasicBlock *R = cast<BasicBlock>(I->R);
-        if (L->hasName() && R->hasName() && L->getName() == R->getName())
-          out << "  in block %" << L->getName() << ":\n";
-        else {
-          out << "  in block ";
-          printValue(L, true);
-          out << " / ";
-          printValue(R, false);
-          out << ":\n";
-        }
-      } else if (isa<Instruction>(I->L)) {
-        out << "    in instruction ";
-        printValue(I->L, true);
-        out << " / ";
-        printValue(I->R, false);
-        out << ":\n";
-      }
-
-      I->Differences = true;
-    }
-  }
-
-  void indent() {
-    unsigned N = Indent;
-    while (N--) out << ' ';
-  }
-
-public:
-  DiffConsumer(Module *L, Module *R)
-    : out(errs()), LModule(L), RModule(R), Differences(false), Indent(0) {}
-
-  bool hadDifferences() const { return Differences; }
-
-  void enterContext(Value *L, Value *R) {
-    contexts.push_back(DiffContext(L, R));
-    Indent += 2;
-  }
-  void exitContext() {
-    Differences |= contexts.back().Differences;
-    contexts.pop_back();
-    Indent -= 2;
-  }
-
-  void log(StringRef text) {
-    header();
-    indent();
-    out << text << '\n';
-  }
-
-  void logf(const DifferenceEngine::LogBuilder &Log) {
-    header();
-    indent();
-
-    unsigned arg = 0;
-
-    StringRef format = Log.getFormat();
-    while (true) {
-      size_t percent = format.find('%');
-      if (percent == StringRef::npos) {
-        out << format;
-        break;
-      }
-      assert(format[percent] == '%');
-
-      if (percent > 0) out << format.substr(0, percent);
-
-      switch (format[percent+1]) {
-      case '%': out << '%'; break;
-      case 'l': printValue(Log.getArgument(arg++), true); break;
-      case 'r': printValue(Log.getArgument(arg++), false); break;
-      default: llvm_unreachable("unknown format character");
-      }
-
-      format = format.substr(percent+2);
-    }
-
-    out << '\n';
-  }
-
-  void logd(const DifferenceEngine::DiffLogBuilder &Log) {
-    header();
-
-    for (unsigned I = 0, E = Log.getNumLines(); I != E; ++I) {
-      indent();
-      switch (Log.getLineKind(I)) {
-      case DifferenceEngine::DC_match:
-        out << "  ";
-        Log.getLeft(I)->dump();
-        //printValue(Log.getLeft(I), true);
-        break;
-      case DifferenceEngine::DC_left:
-        out << "< ";
-        Log.getLeft(I)->dump();
-        //printValue(Log.getLeft(I), true);
-        break;
-      case DifferenceEngine::DC_right:
-        out << "> ";
-        Log.getRight(I)->dump();
-        //printValue(Log.getRight(I), false);
-        break;
-      }
-      //out << "\n";
-    }
-  }
-  
-};
-} // end anonymous namespace
-
 static void diffGlobal(DifferenceEngine &Engine, Module *L, Module *R,
                        StringRef Name) {
   // Drop leading sigils from the global name.
diff --git a/tools/llvm-dis/CMakeLists.txt b/tools/llvm-dis/CMakeLists.txt
index d62a6b5..3125f8a 100644
--- a/tools/llvm-dis/CMakeLists.txt
+++ b/tools/llvm-dis/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS bitreader)
+set(LLVM_LINK_COMPONENTS bitreader analysis)
 set(LLVM_REQUIRES_EH 1)
 
 add_llvm_tool(llvm-dis
diff --git a/tools/llvm-dis/Makefile b/tools/llvm-dis/Makefile
index 22c9ecc..be71100 100644
--- a/tools/llvm-dis/Makefile
+++ b/tools/llvm-dis/Makefile
@@ -9,7 +9,7 @@
 LEVEL = ../..
 
 TOOLNAME = llvm-dis
-LINK_COMPONENTS := bitreader
+LINK_COMPONENTS := bitreader analysis
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index b4977ce..7453d9e 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -19,7 +19,9 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Type.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Assembly/AssemblyAnnotationWriter.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
@@ -50,6 +52,16 @@ ShowAnnotations("show-annotations",
 
 namespace {
 
+static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) {
+  OS << DL.getLine() << ":" << DL.getCol();
+  if (MDNode *N = DL.getInlinedAt(getGlobalContext())) {
+    DebugLoc IDL = DebugLoc::getFromDILocation(N);
+    if (!IDL.isUnknown()) {
+      OS << "@";
+      printDebugLoc(IDL,OS);
+    }
+  }
+}
 class CommentWriter : public AssemblyAnnotationWriter {
 public:
   void emitFunctionAnnot(const Function *F,
@@ -58,10 +70,43 @@ public:
     OS << '\n';
   }
   void printInfoComment(const Value &V, formatted_raw_ostream &OS) {
-    if (V.getType()->isVoidTy()) return;
-
-    OS.PadToColumn(50);
-    OS << "; [#uses=" << V.getNumUses() << ']';  // Output # uses
+    bool Padded = false;
+    if (!V.getType()->isVoidTy()) {
+      OS.PadToColumn(50);
+      Padded = true;
+      OS << "; [#uses=" << V.getNumUses() << " type=" << V.getType()->getDescription() << "]";  // Output # uses and type
+    }
+    if (const Instruction *I = dyn_cast<Instruction>(&V)) {
+      const DebugLoc &DL = I->getDebugLoc();
+      if (!DL.isUnknown()) {
+        if (!Padded) {
+          OS.PadToColumn(50);
+          Padded = true;
+          OS << ";";
+        }
+        OS << " [debug line = ";
+        printDebugLoc(DL,OS);
+        OS << "]";
+      }
+      if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
+        DIVariable Var(DDI->getVariable());
+        if (!Padded) {
+          OS.PadToColumn(50);
+          Padded = true;
+          OS << ";";
+        }
+        OS << " [debug variable = " << Var.getName() << "]";
+      }
+      else if (const DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
+        DIVariable Var(DVI->getVariable());
+        if (!Padded) {
+          OS.PadToColumn(50);
+          Padded = true;
+          OS << ";";
+        }
+        OS << " [debug variable = " << Var.getName() << "]";
+      }
+    }
   }
 };
 
diff --git a/tools/llvm-ld/llvm-ld.cpp b/tools/llvm-ld/llvm-ld.cpp
index cd6ce25..6b4c3c7 100644
--- a/tools/llvm-ld/llvm-ld.cpp
+++ b/tools/llvm-ld/llvm-ld.cpp
@@ -552,12 +552,12 @@ int main(int argc, char **argv, char **envp) {
   }
 
   // Arrange for the bitcode output file to be deleted on any errors.
-  BitcodeOutputRemover.setFile(sys::Path(BitcodeOutputFilename));
+  BitcodeOutputRemover.setFile(BitcodeOutputFilename);
   sys::RemoveFileOnSignal(sys::Path(BitcodeOutputFilename));
 
   // Arrange for the output file to be deleted on any errors.
   if (!LinkAsLibrary) {
-    OutputRemover.setFile(sys::Path(OutputFilename));
+    OutputRemover.setFile(OutputFilename);
     sys::RemoveFileOnSignal(sys::Path(OutputFilename));
   }
 
@@ -657,7 +657,7 @@ int main(int argc, char **argv, char **envp) {
       AssemblyFile.appendSuffix("s");
 
       // Mark the output files for removal.
-      FileRemover AssemblyFileRemover(AssemblyFile);
+      FileRemover AssemblyFileRemover(AssemblyFile.str());
       sys::RemoveFileOnSignal(AssemblyFile);
 
       // Determine the locations of the llc and gcc programs.
@@ -684,7 +684,7 @@ int main(int argc, char **argv, char **envp) {
       CFile.appendSuffix("cbe.c");
 
       // Mark the output files for removal.
-      FileRemover CFileRemover(CFile);
+      FileRemover CFileRemover(CFile.str());
       sys::RemoveFileOnSignal(CFile);
 
       // Determine the locations of the llc and gcc programs.
diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp
index d98b57e..cdd1892 100644
--- a/tools/llvm-mc/Disassembler.cpp
+++ b/tools/llvm-mc/Disassembler.cpp
@@ -127,7 +127,8 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray,
   return false;
 }
 
-int Disassembler::disassemble(const Target &T, const std::string &Triple,
+int Disassembler::disassemble(const Target &T,  TargetMachine &TM,
+                              const std::string &Triple,
                               MemoryBuffer &Buffer,
                               raw_ostream &Out) {
   // Set up disassembler.
@@ -145,7 +146,7 @@ int Disassembler::disassemble(const Target &T, const std::string &Triple,
   }
   
   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
-  OwningPtr<MCInstPrinter> IP(T.createMCInstPrinter(AsmPrinterVariant,
+  OwningPtr<MCInstPrinter> IP(T.createMCInstPrinter(TM, AsmPrinterVariant,
                                                     *AsmInfo));
   if (!IP) {
     errs() << "error: no instruction printer for target " << Triple << '\n';
@@ -239,12 +240,12 @@ int Disassembler::disassembleEnhanced(const std::string &TS,
     OwningPtr<EDInst>
       inst(disassembler->createInst(byteArrayReader, 0, &ByteArray));
   
-    ByteArray.erase (ByteArray.begin(), ByteArray.begin() + inst->byteSize());
-                               
     if (inst == 0) {
       errs() << "error: Didn't get an instruction\n";
       return -1;
     }
+
+    ByteArray.erase (ByteArray.begin(), ByteArray.begin() + inst->byteSize());
     
     unsigned numTokens = inst->numTokens();
     if ((int)numTokens < 0) {
diff --git a/tools/llvm-mc/Disassembler.h b/tools/llvm-mc/Disassembler.h
index b56f2e9..aaf77b5 100644
--- a/tools/llvm-mc/Disassembler.h
+++ b/tools/llvm-mc/Disassembler.h
@@ -19,13 +19,15 @@
 
 namespace llvm {
 
-class Target;
 class MemoryBuffer;
+class Target;
+class TargetMachine;
 class raw_ostream;
 
 class Disassembler {
 public:
-  static int disassemble(const Target &target, 
+  static int disassemble(const Target &target,
+                         TargetMachine &TM,
                          const std::string &tripleString,
                          MemoryBuffer &buffer,
                          raw_ostream &Out);
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 2c22bed..24cc263 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -113,6 +113,10 @@ static cl::opt<bool>
 NoInitialTextSection("n", cl::desc(
                    "Don't assume assembly file starts in the text section"));
 
+static cl::opt<bool>
+SaveTempLabels("L", cl::desc(
+                 "Don't discard temporary labels"));
+
 enum ActionType {
   AC_AsLex,
   AC_Assemble,
@@ -327,6 +331,8 @@ static int AssembleInput(const char *ProgName) {
 
   const TargetAsmInfo *tai = new TargetAsmInfo(*TM);
   MCContext Ctx(*MAI, tai);
+  if (SaveTempLabels)
+    Ctx.setAllowTemporaryLabels(false);
 
   OwningPtr<tool_output_file> Out(GetOutputStream());
   if (!Out)
@@ -342,7 +348,7 @@ static int AssembleInput(const char *ProgName) {
   // FIXME: There is a bit of code duplication with addPassesToEmitFile.
   if (FileType == OFT_AssemblyFile) {
     MCInstPrinter *IP =
-      TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI);
+      TheTarget->createMCInstPrinter(*TM, OutputAsmVariant, *MAI);
     MCCodeEmitter *CE = 0;
     TargetAsmBackend *TAB = 0;
     if (ShowEncoding) {
@@ -350,7 +356,8 @@ static int AssembleInput(const char *ProgName) {
       TAB = TheTarget->createAsmBackend(TripleName);
     }
     Str.reset(TheTarget->createAsmStreamer(Ctx, FOS, /*asmverbose*/true,
-                                           /*useLoc*/ true, IP, CE, TAB,
+                                           /*useLoc*/ true,
+                                           /*useCFI*/ true, IP, CE, TAB,
                                            ShowInst));
   } else if (FileType == OFT_Null) {
     Str.reset(createNullStreamer(Ctx));
@@ -403,12 +410,34 @@ static int DisassembleInput(const char *ProgName, bool Enhanced) {
     return 1;
 
   int Res;
-  if (Enhanced)
+  if (Enhanced) {
     Res =
       Disassembler::disassembleEnhanced(TripleName, *Buffer.take(), Out->os());
-  else
-    Res = Disassembler::disassemble(*TheTarget, TripleName,
+  } else {
+    // Package up features to be passed to target/subtarget
+    std::string FeaturesStr;
+    if (MCPU.size()) {
+      SubtargetFeatures Features;
+      Features.setCPU(MCPU);
+      FeaturesStr = Features.getString();
+    }
+
+    // FIXME: We shouldn't need to do this (and link in codegen).
+    //        When we split this out, we should do it in a way that makes
+    //        it straightforward to switch subtargets on the fly (.e.g,
+    //        the .cpu and .code16 directives).
+    OwningPtr<TargetMachine> TM(TheTarget->createTargetMachine(TripleName,
+                                                               FeaturesStr));
+
+    if (!TM) {
+      errs() << ProgName << ": error: could not create target for triple '"
+             << TripleName << "'.\n";
+      return 1;
+    }
+
+    Res = Disassembler::disassemble(*TheTarget, *TM, TripleName,
                                     *Buffer.take(), Out->os());
+  }
 
   // Keep output if no errors.
   if (Res == 0) Out->keep();
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 1fef8b6..a17624a 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -38,13 +38,13 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetSelect.h"
 #include <algorithm>
 #include <cctype>
 #include <cerrno>
 #include <cstring>
-#include <vector>
 using namespace llvm;
 using namespace object;
 
@@ -182,9 +182,21 @@ static void DisassembleInput(const StringRef &Filename) {
       return;
     }
 
+    // FIXME: We shouldn't need to do this (and link in codegen).
+    //        When we split this out, we should do it in a way that makes
+    //        it straightforward to switch subtargets on the fly (.e.g,
+    //        the .cpu and .code16 directives).
+    std::string FeaturesStr;
+    OwningPtr<TargetMachine> TM(TheTarget->createTargetMachine(TripleName,
+                                                               FeaturesStr));
+    if (!TM) {
+      errs() << "error: could not create target for triple " << TripleName << "\n";
+      return;
+    }
+
     int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
     OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
-                                  AsmPrinterVariant, *AsmInfo));
+                                  *TM, AsmPrinterVariant, *AsmInfo));
     if (!IP) {
       errs() << "error: no instruction printer for target " << TripleName << '\n';
       return;
diff --git a/tools/llvm-rtdyld/CMakeLists.txt b/tools/llvm-rtdyld/CMakeLists.txt
new file mode 100644
index 0000000..17e2c3e
--- /dev/null
+++ b/tools/llvm-rtdyld/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support MC object RuntimeDyld JIT)
+
+add_llvm_tool(llvm-rtdyld
+  llvm-rtdyld.cpp
+  )
diff --git a/tools/llvm-rtdyld/Makefile b/tools/llvm-rtdyld/Makefile
new file mode 100644
index 0000000..0d57277
--- /dev/null
+++ b/tools/llvm-rtdyld/Makefile
@@ -0,0 +1,23 @@
+##===- tools/llvm-rtdyld/Makefile --------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = llvm-rtdyld
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Include this here so we can get the configuration of the targets
+# that have been configured for construction. We have to do this
+# early so we can set up LINK_COMPONENTS before including Makefile.rules
+include $(LEVEL)/Makefile.config
+
+LINK_COMPONENTS := $(TARGETS_TO_BUILD) support MC object RuntimeDyld JIT
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp
new file mode 100644
index 0000000..ec9d652
--- /dev/null
+++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -0,0 +1,151 @@
+//===-- llvm-rtdyld.cpp - MCJIT Testing Tool ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a testing tool for use with the MC-JIT LLVM components.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/Object/MachOObject.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+using namespace llvm::object;
+
+static cl::list<std::string>
+InputFileList(cl::Positional, cl::ZeroOrMore,
+              cl::desc("<input file>"));
+
+enum ActionType {
+  AC_Execute
+};
+
+static cl::opt<ActionType>
+Action(cl::desc("Action to perform:"),
+       cl::init(AC_Execute),
+       cl::values(clEnumValN(AC_Execute, "execute",
+                             "Load, link, and execute the inputs."),
+                  clEnumValEnd));
+
+static cl::opt<std::string>
+EntryPoint("entry",
+           cl::desc("Function to call as entry point."),
+           cl::init("_main"));
+
+/* *** */
+
+// A trivial memory manager that doesn't do anything fancy, just uses the
+// support library allocation routines directly.
+class TrivialMemoryManager : public RTDyldMemoryManager {
+public:
+  SmallVector<sys::MemoryBlock, 16> FunctionMemory;
+
+  uint8_t *startFunctionBody(const char *Name, uintptr_t &Size);
+  void endFunctionBody(const char *Name, uint8_t *FunctionStart,
+                       uint8_t *FunctionEnd);
+};
+
+uint8_t *TrivialMemoryManager::startFunctionBody(const char *Name,
+                                                 uintptr_t &Size) {
+  return (uint8_t*)sys::Memory::AllocateRWX(Size, 0, 0).base();
+}
+
+void TrivialMemoryManager::endFunctionBody(const char *Name,
+                                           uint8_t *FunctionStart,
+                                           uint8_t *FunctionEnd) {
+  uintptr_t Size = FunctionEnd - FunctionStart + 1;
+  FunctionMemory.push_back(sys::MemoryBlock(FunctionStart, Size));
+}
+
+static const char *ProgramName;
+
+static void Message(const char *Type, const Twine &Msg) {
+  errs() << ProgramName << ": " << Type << ": " << Msg << "\n";
+}
+
+static int Error(const Twine &Msg) {
+  Message("error", Msg);
+  return 1;
+}
+
+/* *** */
+
+static int executeInput() {
+  // Instantiate a dynamic linker.
+  TrivialMemoryManager *MemMgr = new TrivialMemoryManager;
+  RuntimeDyld Dyld(MemMgr);
+
+  // If we don't have any input files, read from stdin.
+  if (!InputFileList.size())
+    InputFileList.push_back("-");
+  for(unsigned i = 0, e = InputFileList.size(); i != e; ++i) {
+    // Load the input memory buffer.
+    OwningPtr<MemoryBuffer> InputBuffer;
+    if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFileList[i],
+                                                     InputBuffer))
+      return Error("unable to read input: '" + ec.message() + "'");
+
+    // Load the object file into it.
+    if (Dyld.loadObject(InputBuffer.take())) {
+      return Error(Dyld.getErrorString());
+    }
+  }
+
+  // Resolve all the relocations we can.
+  Dyld.resolveRelocations();
+
+  // FIXME: Error out if there are unresolved relocations.
+
+  // Get the address of the entry point (_main by default).
+  void *MainAddress = Dyld.getSymbolAddress(EntryPoint);
+  if (MainAddress == 0)
+    return Error("no definition for '" + EntryPoint + "'");
+
+  // Invalidate the instruction cache for each loaded function.
+  for (unsigned i = 0, e = MemMgr->FunctionMemory.size(); i != e; ++i) {
+    sys::MemoryBlock &Data = MemMgr->FunctionMemory[i];
+    // Make sure the memory is executable.
+    std::string ErrorStr;
+    sys::Memory::InvalidateInstructionCache(Data.base(), Data.size());
+    if (!sys::Memory::setExecutable(Data, &ErrorStr))
+      return Error("unable to mark function executable: '" + ErrorStr + "'");
+  }
+
+  // Dispatch to _main().
+  errs() << "loaded '" << EntryPoint << "' at: " << (void*)MainAddress << "\n";
+
+  int (*Main)(int, const char**) =
+    (int(*)(int,const char**)) uintptr_t(MainAddress);
+  const char **Argv = new const char*[2];
+  // Use the name of the first input object module as argv[0] for the target.
+  Argv[0] = InputFileList[0].c_str();
+  Argv[1] = 0;
+  return Main(1, Argv);
+}
+
+int main(int argc, char **argv) {
+  ProgramName = argv[0];
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  cl::ParseCommandLineOptions(argc, argv, "llvm MC-JIT tool\n");
+
+  switch (Action) {
+  default:
+  case AC_Execute:
+    return executeInput();
+  }
+
+  return 0;
+}
diff --git a/tools/llvm-stub/llvm-stub.c b/tools/llvm-stub/llvm-stub.c
index 31c2d09..69cd6ed 100644
--- a/tools/llvm-stub/llvm-stub.c
+++ b/tools/llvm-stub/llvm-stub.c
@@ -64,7 +64,7 @@ int main(int argc, char** argv) {
   memcpy((char **)Args+2, argv+1, sizeof(char*)*argc);
 
   /* Run the JIT. */
-#ifndef _WIN32
+#if !defined(_WIN32) || defined(__MINGW64__)
   execvp(Interp, (char **)Args); /* POSIX execvp takes a char *const[]. */
 #else
   execvp(Interp, Args); /* windows execvp takes a const char *const *. */
diff --git a/tools/llvmc/doc/LLVMC-Reference.rst b/tools/llvmc/doc/LLVMC-Reference.rst
index ec9098b..041aedf 100644
--- a/tools/llvmc/doc/LLVMC-Reference.rst
+++ b/tools/llvmc/doc/LLVMC-Reference.rst
@@ -18,17 +18,16 @@ Introduction
 ============
 
 LLVMC is a generic compiler driver, designed to be customizable and
-extensible. It plays the same role for LLVM as the ``gcc`` program
-does for GCC - LLVMC's job is essentially to transform a set of input
-files into a set of targets depending on configuration rules and user
-options. What makes LLVMC different is that these transformation rules
-are completely customizable - in fact, LLVMC knows nothing about the
-specifics of transformation (even the command-line options are mostly
-not hard-coded) and regards the transformation structure as an
-abstract graph. The structure of this graph is completely determined
-by plugins, which can be either statically or dynamically linked. This
-makes it possible to easily adapt LLVMC for other purposes - for
-example, as a build tool for game resources.
+extensible. It plays the same role for LLVM as the ``gcc`` program does for
+GCC - LLVMC's job is essentially to transform a set of input files into a set of
+targets depending on configuration rules and user options. What makes LLVMC
+different is that these transformation rules are completely customizable - in
+fact, LLVMC knows nothing about the specifics of transformation (even the
+command-line options are mostly not hard-coded) and regards the transformation
+structure as an abstract graph. The structure of this graph is described in
+high-level TableGen code, from which an efficient C++ representation is
+automatically derived. This makes it possible to adapt LLVMC for other
+purposes - for example, as a build tool for game resources.
 
 Because LLVMC employs TableGen_ as its configuration language, you
 need to be familiar with it to customize LLVMC.
@@ -36,8 +35,8 @@ need to be familiar with it to customize LLVMC.
 .. _TableGen: http://llvm.org/docs/TableGenFundamentals.html
 
 
-Compiling with LLVMC
-====================
+Compiling with ``llvmc``
+========================
 
 LLVMC tries hard to be as compatible with ``gcc`` as possible,
 although there are some small differences. Most of the time, however,
@@ -78,17 +77,13 @@ possible to choose the ``clang`` compiler with the ``-clang`` option.
 Predefined options
 ==================
 
-LLVMC has some built-in options that can't be overridden in the
-configuration libraries:
+LLVMC has some built-in options that can't be overridden in the TableGen code:
 
 * ``-o FILE`` - Output file name.
 
 * ``-x LANGUAGE`` - Specify the language of the following input files
   until the next -x option.
 
-* ``-load PLUGIN_NAME`` - Load the specified plugin DLL. Example:
-  ``-load $LLVM_DIR/Release/lib/LLVMCSimple.so``.
-
 * ``-v`` - Enable verbose mode, i.e. print out all executed commands.
 
 * ``--save-temps`` - Write temporary files to the current directory and do not
@@ -103,124 +98,90 @@ configuration libraries:
   precedence.
 
 * ``--check-graph`` - Check the compilation for common errors like mismatched
-  output/input language names, multiple default edges and cycles. Because of
-  plugins, these checks can't be performed at compile-time. Exit with code zero
-  if no errors were found, and return the number of found errors
-  otherwise. Hidden option, useful for debugging LLVMC plugins.
+  output/input language names, multiple default edges and cycles. Exit with code
+  zero if no errors were found, and return the number of found errors
+  otherwise. Hidden option, useful for debugging.
 
 * ``--view-graph`` - Show a graphical representation of the compilation graph
   and exit. Requires that you have ``dot`` and ``gv`` programs installed. Hidden
-  option, useful for debugging LLVMC plugins.
+  option, useful for debugging.
 
 * ``--write-graph`` - Write a ``compilation-graph.dot`` file in the current
   directory with the compilation graph description in Graphviz format (identical
   to the file used by the ``--view-graph`` option). The ``-o`` option can be
-  used to set the output file name. Hidden option, useful for debugging LLVMC
-  plugins.
+  used to set the output file name. Hidden option, useful for debugging.
 
 * ``--help``, ``--help-hidden``, ``--version`` - These options have
   their standard meaning.
 
-Compiling LLVMC plugins
-=======================
+Compiling LLVMC-based drivers
+=============================
 
-It's easiest to start working on your own LLVMC plugin by copying the
-skeleton project which lives under ``$LLVMC_DIR/plugins/Simple``::
+It's easiest to start working on your own LLVMC driver by copying the skeleton
+project which lives under ``$LLVMC_DIR/examples/Skeleton``::
 
-   $ cd $LLVMC_DIR/plugins
-   $ cp -r Simple MyPlugin
-   $ cd MyPlugin
+   $ cd $LLVMC_DIR/examples
+   $ cp -r Skeleton MyDriver
+   $ cd MyDriver
    $ ls
-   Makefile PluginMain.cpp Simple.td
+   AutoGenerated.td  Hooks.cpp  Main.cpp  Makefile
 
-As you can see, our basic plugin consists of only two files (not
-counting the build script). ``Simple.td`` contains TableGen
-description of the compilation graph; its format is documented in the
-following sections. ``PluginMain.cpp`` is just a helper file used to
-compile the auto-generated C++ code produced from TableGen source. It
-can also contain hook definitions (see `below`__).
+As you can see, our basic driver consists of only three files (not counting the
+build script). ``AutoGenerated.td`` contains TableGen description of the
+compilation graph; its format is documented in the following
+sections. ``Hooks.cpp`` is an empty file that should be used for hook
+definitions (see `below`__). ``Main.cpp`` is just a helper used to compile the
+auto-generated C++ code produced from TableGen source.
 
 __ hooks_
 
-The first thing that you should do is to change the ``LLVMC_PLUGIN``
-variable in the ``Makefile`` to avoid conflicts (since this variable
-is used to name the resulting library)::
-
-   LLVMC_PLUGIN=MyPlugin
-
-It is also a good idea to rename ``Simple.td`` to something less
-generic::
+The first thing that you should do is to change the ``LLVMC_BASED_DRIVER``
+variable in the ``Makefile``::
 
-   $ mv Simple.td MyPlugin.td
+   LLVMC_BASED_DRIVER=MyDriver
 
-To build your plugin as a dynamic library, just ``cd`` to its source
-directory and run ``make``. The resulting file will be called
-``plugin_llvmc_$(LLVMC_PLUGIN).$(DLL_EXTENSION)`` (in our case,
-``plugin_llvmc_MyPlugin.so``). This library can be then loaded in with the
-``-load`` option. Example::
+It can also be a good idea to put your TableGen code into a file with a less
+generic name::
 
-    $ cd $LLVMC_DIR/plugins/Simple
-    $ make
-    $ llvmc -load $LLVM_DIR/Release/lib/plugin_llvmc_Simple.so
+   $ touch MyDriver.td
+   $ vim AutoGenerated.td
+   [...]
+   include "MyDriver.td"
 
-Compiling standalone LLVMC-based drivers
-========================================
+If you have more than one TableGen source file, they all should be included from
+``AutoGenerated.td``, since this file is used by the build system to generate
+C++ code.
 
-By default, the ``llvmc`` executable consists of a driver core plus several
-statically linked plugins (``Base`` and ``Clang`` at the moment). You can
-produce a standalone LLVMC-based driver executable by linking the core with your
-own plugins. The recommended way to do this is by starting with the provided
-``Skeleton`` example (``$LLVMC_DIR/example/Skeleton``)::
-
-    $ cd $LLVMC_DIR/example/
-    $ cp -r Skeleton mydriver
-    $ cd mydriver
-    $ vim Makefile
-    [...]
-    $ make
+To build your driver, just ``cd`` to its source directory and run ``make``. The
+resulting executable will be put into ``$LLVM_OBJ_DIR/$(BuildMode)/bin``.
 
 If you're compiling LLVM with different source and object directories, then you
 must perform the following additional steps before running ``make``::
 
     # LLVMC_SRC_DIR = $LLVM_SRC_DIR/tools/llvmc/
     # LLVMC_OBJ_DIR = $LLVM_OBJ_DIR/tools/llvmc/
-    $ cp $LLVMC_SRC_DIR/example/mydriver/Makefile \
-      $LLVMC_OBJ_DIR/example/mydriver/
-    $ cd $LLVMC_OBJ_DIR/example/mydriver
+    $ mkdir $LLVMC_OBJ_DIR/examples/MyDriver/
+    $ cp $LLVMC_SRC_DIR/examples/MyDriver/Makefile \
+      $LLVMC_OBJ_DIR/examples/MyDriver/
+    $ cd $LLVMC_OBJ_DIR/examples/MyDriver
     $ make
 
-Another way to do the same thing is by using the following command::
-
-    $ cd $LLVMC_DIR
-    $ make LLVMC_BUILTIN_PLUGINS=MyPlugin LLVMC_BASED_DRIVER_NAME=mydriver
-
-This works with both srcdir == objdir and srcdir != objdir, but assumes that the
-plugin source directory was placed under ``$LLVMC_DIR/plugins``.
-
-Sometimes, you will want a 'bare-bones' version of LLVMC that has no
-built-in plugins. It can be compiled with the following command::
-
-    $ cd $LLVMC_DIR
-    $ make LLVMC_BUILTIN_PLUGINS=""
-
 
 Customizing LLVMC: the compilation graph
 ========================================
 
-Each TableGen configuration file should include the common
-definitions::
+Each TableGen configuration file should include the common definitions::
 
    include "llvm/CompilerDriver/Common.td"
 
-Internally, LLVMC stores information about possible source
-transformations in form of a graph. Nodes in this graph represent
-tools, and edges between two nodes represent a transformation path. A
-special "root" node is used to mark entry points for the
-transformations. LLVMC also assigns a weight to each edge (more on
-this later) to choose between several alternative edges.
+Internally, LLVMC stores information about possible source transformations in
+form of a graph. Nodes in this graph represent tools, and edges between two
+nodes represent a transformation path. A special "root" node is used to mark
+entry points for the transformations. LLVMC also assigns a weight to each edge
+(more on this later) to choose between several alternative edges.
 
-The definition of the compilation graph (see file
-``plugins/Base/Base.td`` for an example) is just a list of edges::
+The definition of the compilation graph (see file ``llvmc/src/Base.td`` for an
+example) is just a list of edges::
 
     def CompilationGraph : CompilationGraph<[
         Edge<"root", "llvm_gcc_c">,
@@ -245,43 +206,37 @@ The definition of the compilation graph (see file
 
         ]>;
 
-As you can see, the edges can be either default or optional, where
-optional edges are differentiated by an additional ``case`` expression
-used to calculate the weight of this edge. Notice also that we refer
-to tools via their names (as strings). This makes it possible to add
-edges to an existing compilation graph in plugins without having to
-know about all tool definitions used in the graph.
-
-The default edges are assigned a weight of 1, and optional edges get a
-weight of 0 + 2*N where N is the number of tests that evaluated to
-true in the ``case`` expression. It is also possible to provide an
-integer parameter to ``inc_weight`` and ``dec_weight`` - in this case,
-the weight is increased (or decreased) by the provided value instead
-of the default 2. It is also possible to change the default weight of
-an optional edge by using the ``default`` clause of the ``case``
+As you can see, the edges can be either default or optional, where optional
+edges are differentiated by an additional ``case`` expression used to calculate
+the weight of this edge. Notice also that we refer to tools via their names (as
+strings). This makes it possible to add edges to an existing compilation graph
+without having to know about all tool definitions used in the graph.
+
+The default edges are assigned a weight of 1, and optional edges get a weight of
+0 + 2*N where N is the number of tests that evaluated to true in the ``case``
+expression. It is also possible to provide an integer parameter to
+``inc_weight`` and ``dec_weight`` - in this case, the weight is increased (or
+decreased) by the provided value instead of the default 2. Default weight of an
+optional edge can be changed by using the ``default`` clause of the ``case``
 construct.
 
-When passing an input file through the graph, LLVMC picks the edge
-with the maximum weight. To avoid ambiguity, there should be only one
-default edge between two nodes (with the exception of the root node,
-which gets a special treatment - there you are allowed to specify one
-default edge *per language*).
+When passing an input file through the graph, LLVMC picks the edge with the
+maximum weight. To avoid ambiguity, there should be only one default edge
+between two nodes (with the exception of the root node, which gets a special
+treatment - there you are allowed to specify one default edge *per language*).
 
-When multiple plugins are loaded, their compilation graphs are merged
-together. Since multiple edges that have the same end nodes are not
-allowed (i.e. the graph is not a multigraph), an edge defined in
-several plugins will be replaced by the definition from the plugin
-that was loaded last. Plugin load order can be controlled by using the
-plugin priority feature described above.
+When multiple compilation graphs are defined, they are merged together. Multiple
+edges with the same end nodes are not allowed (i.e. the graph is not a
+multigraph), and will lead to a compile-time error.
 
-To get a visual representation of the compilation graph (useful for
-debugging), run ``llvmc --view-graph``. You will need ``dot`` and
-``gsview`` installed for this to work properly.
+To get a visual representation of the compilation graph (useful for debugging),
+run ``llvmc --view-graph``. You will need ``dot`` and ``gsview`` installed for
+this to work properly.
 
 Describing options
 ==================
 
-Command-line options that the plugin supports are defined by using an
+Command-line options supported by the driver are defined by using an
 ``OptionList``::
 
     def Options : OptionList<[
@@ -290,11 +245,10 @@ Command-line options that the plugin supports are defined by using an
     ...
     ]>;
 
-As you can see, the option list is just a list of DAGs, where each DAG
-is an option description consisting of the option name and some
-properties. A plugin can define more than one option list (they are
-all merged together in the end), which can be handy if one wants to
-separate option groups syntactically.
+As you can see, the option list is just a list of DAGs, where each DAG is an
+option description consisting of the option name and some properties. More than
+one option list can be defined (they are all merged together in the end), which
+can be handy if one wants to separate option groups syntactically.
 
 * Possible option types:
 
@@ -306,7 +260,7 @@ separate option groups syntactically.
      sign: ``-std c99``. At most one occurrence is allowed.
 
    - ``parameter_list_option`` - same as the above, but more than one option
-     occurence is allowed.
+     occurrence is allowed.
 
    - ``prefix_option`` - same as the parameter_option, but the option name and
      argument do not have to be separated. Example: ``-ofile``. This can be also
@@ -314,7 +268,7 @@ separate option groups syntactically.
      (``=file`` will be interpreted as option value). At most one occurrence is
      allowed.
 
-   - ``prefix_list_option`` - same as the above, but more than one occurence of
+   - ``prefix_list_option`` - same as the above, but more than one occurrence of
      the option is allowed; example: ``-lm -lpthread``.
 
    - ``alias_option`` - a special option type for creating aliases. Unlike other
@@ -380,42 +334,17 @@ separate option groups syntactically.
      Usage examples: ``(switch_option "foo", (init true))``; ``(prefix_option
      "bar", (init "baz"))``.
 
-   - ``extern`` - this option is defined in some other plugin, see `below`__.
-
-   __ extern_
-
-.. _extern:
-
-External options
-----------------
-
-Sometimes, when linking several plugins together, one plugin needs to
-access options defined in some other plugin. Because of the way
-options are implemented, such options must be marked as
-``extern``. This is what the ``extern`` option property is
-for. Example::
-
-     ...
-     (switch_option "E", (extern))
-     ...
-
-If an external option has additional attributes besides 'extern', they are
-ignored. See also the section on plugin `priorities`__.
-
-__ priorities_
-
 .. _case:
 
 Conditional evaluation
 ======================
 
-The 'case' construct is the main means by which programmability is
-achieved in LLVMC. It can be used to calculate edge weights, program
-actions and modify the shell commands to be executed. The 'case'
-expression is designed after the similarly-named construct in
-functional languages and takes the form ``(case (test_1), statement_1,
-(test_2), statement_2, ... (test_N), statement_N)``. The statements
-are evaluated only if the corresponding tests evaluate to true.
+The 'case' construct is the main means by which programmability is achieved in
+LLVMC. It can be used to calculate edge weights, program actions and modify the
+shell commands to be executed. The 'case' expression is designed after the
+similarly-named construct in functional languages and takes the form ``(case
+(test_1), statement_1, (test_2), statement_2, ... (test_N), statement_N)``. The
+statements are evaluated only if the corresponding tests evaluate to true.
 
 Examples::
 
@@ -439,20 +368,19 @@ Examples::
         (switch_on "B"), "cmdline2",
         (default), "cmdline3")
 
-Note the slight difference in 'case' expression handling in contexts
-of edge weights and command line specification - in the second example
-the value of the ``"B"`` switch is never checked when switch ``"A"`` is
-enabled, and the whole expression always evaluates to ``"cmdline1"`` in
-that case.
+Note the slight difference in 'case' expression handling in contexts of edge
+weights and command line specification - in the second example the value of the
+``"B"`` switch is never checked when switch ``"A"`` is enabled, and the whole
+expression always evaluates to ``"cmdline1"`` in that case.
 
 Case expressions can also be nested, i.e. the following is legal::
 
     (case (switch_on "E"), (case (switch_on "o"), ..., (default), ...)
           (default), ...)
 
-You should, however, try to avoid doing that because it hurts
-readability. It is usually better to split tool descriptions and/or
-use TableGen inheritance instead.
+You should, however, try to avoid doing that because it hurts readability. It is
+usually better to split tool descriptions and/or use TableGen inheritance
+instead.
 
 * Possible tests are:
 
@@ -526,72 +454,75 @@ use TableGen inheritance instead.
     Example: ``(not (or (test1), (test2), ... (testN)))``.
 
 
-
 Writing a tool description
 ==========================
 
-As was said earlier, nodes in the compilation graph represent tools,
-which are described separately. A tool definition looks like this
-(taken from the ``include/llvm/CompilerDriver/Tools.td`` file)::
+As was said earlier, nodes in the compilation graph represent tools, which are
+described separately. A tool definition looks like this (taken from the
+``llvmc/src/Base.td`` file)::
 
   def llvm_gcc_cpp : Tool<[
       (in_language "c++"),
       (out_language "llvm-assembler"),
       (output_suffix "bc"),
-      (cmd_line "llvm-g++ -c $INFILE -o $OUTFILE -emit-llvm"),
+      (command "llvm-g++ -c -emit-llvm"),
       (sink)
       ]>;
 
 This defines a new tool called ``llvm_gcc_cpp``, which is an alias for
-``llvm-g++``. As you can see, a tool definition is just a list of
-properties; most of them should be self-explanatory. The ``sink``
-property means that this tool should be passed all command-line
-options that aren't mentioned in the option list.
+``llvm-g++``. As you can see, a tool definition is just a list of properties;
+most of them should be self-explanatory. The ``sink`` property means that this
+tool should be passed all command-line options that aren't mentioned in the
+option list.
 
 The complete list of all currently implemented tool properties follows.
 
 * Possible tool properties:
 
   - ``in_language`` - input language name. Can be given multiple arguments, in
-    case the tool supports multiple input languages.
+    case the tool supports multiple input languages. Used for typechecking and
+    mapping file extensions to tools.
 
   - ``out_language`` - output language name. Multiple output languages are
-    allowed.
+    allowed. Used for typechecking the compilation graph.
 
-  - ``output_suffix`` - output file suffix. Can also be changed
-    dynamically, see documentation on actions.
+  - ``output_suffix`` - output file suffix. Can also be changed dynamically, see
+    documentation on `actions`__.
+
+__ actions_
 
-  - ``cmd_line`` - the actual command used to run the tool. You can
-    use ``$INFILE`` and ``$OUTFILE`` variables, output redirection
-    with ``>``, hook invocations (``$CALL``), environment variables
+  - ``command`` - the actual command used to run the tool. You can use output
+    redirection with ``>``, hook invocations (``$CALL``), environment variables
     (via ``$ENV``) and the ``case`` construct.
 
-  - ``join`` - this tool is a "join node" in the graph, i.e. it gets a
-    list of input files and joins them together. Used for linkers.
+  - ``join`` - this tool is a "join node" in the graph, i.e. it gets a list of
+    input files and joins them together. Used for linkers.
 
-  - ``sink`` - all command-line options that are not handled by other
-    tools are passed to this tool.
+  - ``sink`` - all command-line options that are not handled by other tools are
+    passed to this tool.
 
-  - ``actions`` - A single big ``case`` expression that specifies how
-    this tool reacts on command-line options (described in more detail
-    `below`__).
+  - ``actions`` - A single big ``case`` expression that specifies how this tool
+    reacts on command-line options (described in more detail `below`__).
 
 __ actions_
 
+  - ``out_file_option``, ``in_file_option`` - Options appended to the
+    ``command`` string to designate output and input files. Default values are
+    ``"-o"`` and ``""``, respectively.
+
 .. _actions:
 
 Actions
 -------
 
-A tool often needs to react to command-line options, and this is
-precisely what the ``actions`` property is for. The next example
-illustrates this feature::
+A tool often needs to react to command-line options, and this is precisely what
+the ``actions`` property is for. The next example illustrates this feature::
 
   def llvm_gcc_linker : Tool<[
       (in_language "object-code"),
       (out_language "executable"),
       (output_suffix "out"),
-      (cmd_line "llvm-gcc $INFILE -o $OUTFILE"),
+      (command "llvm-gcc"),
       (join),
       (actions (case (not_empty "L"), (forward "L"),
                      (not_empty "l"), (forward "l"),
@@ -599,18 +530,17 @@ illustrates this feature::
                                [(append_cmd "-dummy1"), (append_cmd "-dummy2")])
       ]>;
 
-The ``actions`` tool property is implemented on top of the omnipresent
-``case`` expression. It associates one or more different *actions*
-with given conditions - in the example, the actions are ``forward``,
-which forwards a given option unchanged, and ``append_cmd``, which
-appends a given string to the tool execution command. Multiple actions
-can be associated with a single condition by using a list of actions
-(used in the example to append some dummy options). The same ``case``
-construct can also be used in the ``cmd_line`` property to modify the
-tool command line.
+The ``actions`` tool property is implemented on top of the omnipresent ``case``
+expression. It associates one or more different *actions* with given
+conditions - in the example, the actions are ``forward``, which forwards a given
+option unchanged, and ``append_cmd``, which appends a given string to the tool
+execution command. Multiple actions can be associated with a single condition by
+using a list of actions (used in the example to append some dummy options). The
+same ``case`` construct can also be used in the ``cmd_line`` property to modify
+the tool command line.
 
-The "join" property used in the example means that this tool behaves
-like a linker.
+The "join" property used in the example means that this tool behaves like a
+linker.
 
 The list of all possible actions follows.
 
@@ -656,10 +586,10 @@ The list of all possible actions follows.
 Language map
 ============
 
-If you are adding support for a new language to LLVMC, you'll need to
-modify the language map, which defines mappings from file extensions
-to language names. It is used to choose the proper toolchain(s) for a
-given input file set. Language map definition looks like this::
+If you are adding support for a new language to LLVMC, you'll need to modify the
+language map, which defines mappings from file extensions to language names. It
+is used to choose the proper toolchain(s) for a given input file set. Language
+map definition looks like this::
 
     def LanguageMap : LanguageMap<
         [LangToSuffixes<"c++", ["cc", "cp", "cxx", "cpp", "CPP", "c++", "C"]>,
@@ -673,9 +603,7 @@ For example, without those definitions the following command wouldn't work::
     llvmc: Unknown suffix: cpp
 
 The language map entries are needed only for the tools that are linked from the
-root node. Since a tool can't have multiple output languages, for inner nodes of
-the graph the input and output languages should match. This is enforced at
-compile-time.
+root node. A tool can have multiple output languages.
 
 Option preprocessor
 ===================
@@ -686,7 +614,7 @@ implemented as switches, we might want to output a warning if the user invokes
 the driver with both of these options enabled.
 
 The ``OptionPreprocessor`` feature is reserved specially for these
-occasions. Example (adapted from the built-in Base plugin)::
+occasions. Example (adapted from ``llvm/src/Base.td.in``)::
 
 
     def Preprocess : OptionPreprocessor<
@@ -705,7 +633,7 @@ that they are not forwarded to the compiler. If no optimization options are
 specified, ``-O2`` is enabled.
 
 ``OptionPreprocessor`` is basically a single big ``case`` expression, which is
-evaluated only once right after the plugin is loaded. The only allowed actions
+evaluated only once right after the driver is started. The only allowed actions
 in ``OptionPreprocessor`` are ``error``, ``warning``, and two special actions:
 ``unset_option`` and ``set_option``. As their names suggest, they can be used to
 set or unset a given option. To set an option with ``set_option``, use the
@@ -726,30 +654,28 @@ More advanced topics
 Hooks and environment variables
 -------------------------------
 
-Normally, LLVMC executes programs from the system ``PATH``. Sometimes,
-this is not sufficient: for example, we may want to specify tool paths
-or names in the configuration file. This can be easily achieved via
-the hooks mechanism. To write your own hooks, just add their
-definitions to the ``PluginMain.cpp`` or drop a ``.cpp`` file into the
-your plugin directory. Hooks should live in the ``hooks`` namespace
-and have the signature ``std::string hooks::MyHookName ([const char*
-Arg0 [ const char* Arg2 [, ...]]])``. They can be used from the
-``cmd_line`` tool property::
+Normally, LLVMC searches for programs in the system ``PATH``. Sometimes, this is
+not sufficient: for example, we may want to specify tool paths or names in the
+configuration file. This can be achieved via the hooks mechanism. To write your
+own hooks, add their definitions to the ``Hooks.cpp`` or drop a ``.cpp`` file
+into your driver directory. Hooks should live in the ``hooks`` namespace and
+have the signature ``std::string hooks::MyHookName ([const char* Arg0 [ const
+char* Arg2 [, ...]]])``. They can be used from the ``command`` tool property::
 
-    (cmd_line "$CALL(MyHook)/path/to/file -o $CALL(AnotherHook)")
+    (command "$CALL(MyHook)/path/to/file -o $CALL(AnotherHook)")
 
 To pass arguments to hooks, use the following syntax::
 
-    (cmd_line "$CALL(MyHook, 'Arg1', 'Arg2', 'Arg # 3')/path/to/file -o1 -o2")
+    (command "$CALL(MyHook, 'Arg1', 'Arg2', 'Arg # 3')/path/to/file -o1 -o2")
 
 It is also possible to use environment variables in the same manner::
 
-   (cmd_line "$ENV(VAR1)/path/to/file -o $ENV(VAR2)")
+   (command "$ENV(VAR1)/path/to/file -o $ENV(VAR2)")
 
 To change the command line string based on user-provided options use
 the ``case`` expression (documented `above`__)::
 
-    (cmd_line
+    (command
       (case
         (switch_on "E"),
            "llvm-g++ -E -x c $INFILE -o $OUTFILE",
@@ -758,42 +684,21 @@ the ``case`` expression (documented `above`__)::
 
 __ case_
 
-.. _priorities:
-
-How plugins are loaded
-----------------------
-
-It is possible for LLVMC plugins to depend on each other. For example,
-one can create edges between nodes defined in some other plugin. To
-make this work, however, that plugin should be loaded first. To
-achieve this, the concept of plugin priority was introduced. By
-default, every plugin has priority zero; to specify the priority
-explicitly, put the following line in your plugin's TableGen file::
-
-    def Priority : PluginPriority<$PRIORITY_VALUE>;
-    # Where PRIORITY_VALUE is some integer > 0
-
-Plugins are loaded in order of their (increasing) priority, starting
-with 0. Therefore, the plugin with the highest priority value will be
-loaded last.
-
 Debugging
 ---------
 
-When writing LLVMC plugins, it can be useful to get a visual view of
-the resulting compilation graph. This can be achieved via the command
-line option ``--view-graph``. This command assumes that Graphviz_ and
-Ghostview_ are installed. There is also a ``--write-graph`` option that
-creates a Graphviz source file (``compilation-graph.dot``) in the
-current directory.
-
-Another useful ``llvmc`` option is ``--check-graph``. It checks the
-compilation graph for common errors like mismatched output/input
-language names, multiple default edges and cycles. These checks can't
-be performed at compile-time because the plugins can load code
-dynamically. When invoked with ``--check-graph``, ``llvmc`` doesn't
-perform any compilation tasks and returns the number of encountered
-errors as its status code.
+When writing LLVMC-based drivers, it can be useful to get a visual view of the
+resulting compilation graph. This can be achieved via the command line option
+``--view-graph`` (which assumes that Graphviz_ and Ghostview_ are
+installed). There is also a ``--write-graph`` option that creates a Graphviz
+source file (``compilation-graph.dot``) in the current directory.
+
+Another useful ``llvmc`` option is ``--check-graph``. It checks the compilation
+graph for common errors like mismatched output/input language names, multiple
+default edges and cycles. When invoked with ``--check-graph``, ``llvmc`` doesn't
+perform any compilation tasks and returns the number of encountered errors as
+its status code. In the future, these checks will be performed at compile-time
+and this option will disappear.
 
 .. _Graphviz: http://www.graphviz.org/
 .. _Ghostview: http://pages.cs.wisc.edu/~ghost/
@@ -821,7 +726,7 @@ accessible only in the C++ code (i.e. hooks). Use the following code::
 
 In general, you're encouraged not to make the behaviour dependent on the
 executable file name, and use command-line switches instead. See for example how
-the ``Base`` plugin behaves when it needs to choose the correct linker options
+the ``llvmc`` program behaves when it needs to choose the correct linker options
 (think ``g++`` vs. ``gcc``).
 
 .. raw:: html
diff --git a/tools/llvmc/doc/LLVMC-Tutorial.rst b/tools/llvmc/doc/LLVMC-Tutorial.rst
index e7e8f08..fc4c124 100644
--- a/tools/llvmc/doc/LLVMC-Tutorial.rst
+++ b/tools/llvmc/doc/LLVMC-Tutorial.rst
@@ -17,59 +17,54 @@ Tutorial - Using LLVMC
 Introduction
 ============
 
-LLVMC is a generic compiler driver, which plays the same role for LLVM
-as the ``gcc`` program does for GCC - the difference being that LLVMC
-is designed to be more adaptable and easier to customize. Most of
-LLVMC functionality is implemented via plugins, which can be loaded
-dynamically or compiled in. This tutorial describes the basic usage
-and configuration of LLVMC.
+LLVMC is a generic compiler driver, which plays the same role for LLVM as the
+``gcc`` program does for GCC - the difference being that LLVMC is designed to be
+more adaptable and easier to customize. Most of LLVMC functionality is
+implemented via high-level TableGen code, from which a corresponding C++ source
+file is automatically generated. This tutorial describes the basic usage and
+configuration of LLVMC.
 
 
-Compiling with LLVMC
-====================
+Using the ``llvmc`` program
+===========================
 
-In general, LLVMC tries to be command-line compatible with ``gcc`` as
-much as possible, so most of the familiar options work::
+In general, ``llvmc`` tries to be command-line compatible with ``gcc`` as much
+as possible, so most of the familiar options work::
 
      $ llvmc -O3 -Wall hello.cpp
      $ ./a.out
      hello
 
-This will invoke ``llvm-g++`` under the hood (you can see which
-commands are executed by using the ``-v`` option). For further help on
-command-line LLVMC usage, refer to the ``llvmc --help`` output.
+This will invoke ``llvm-g++`` under the hood (you can see which commands are
+executed by using the ``-v`` option). For further help on command-line LLVMC
+usage, refer to the ``llvmc --help`` output.
 
 
 Using LLVMC to generate toolchain drivers
 =========================================
 
-LLVMC plugins are written mostly using TableGen_, so you need to
-be familiar with it to get anything done.
+LLVMC-based drivers are written mostly using TableGen_, so you need to be
+familiar with it to get anything done.
 
 .. _TableGen: http://llvm.org/docs/TableGenFundamentals.html
 
 Start by compiling ``example/Simple``, which is a primitive wrapper for
 ``gcc``::
 
-    $ cd $LLVM_DIR/tools/llvmc
-    $ cp -r example/Simple plugins/Simple
-
-      # NB: A less verbose way to compile standalone LLVMC-based drivers is
-      # described in the reference manual.
-
-    $ make LLVMC_BASED_DRIVER_NAME=mygcc LLVMC_BUILTIN_PLUGINS=Simple
+    $ cd $LLVM_OBJ_DIR/tools/examples/Simple
+    $ make
     $ cat > hello.c
-    [...]
-    $ mygcc hello.c
+    #include <stdio.h>
+    int main() { printf("Hello\n"); }
+    $ $LLVM_BIN_DIR/Simple -v hello.c
+    gcc hello.c -o hello.out
     $ ./hello.out
     Hello
 
-Here we link our plugin with the LLVMC core statically to form an executable
-file called ``mygcc``. It is also possible to build our plugin as a dynamic
-library to be loaded by the ``llvmc`` executable (or any other LLVMC-based
-standalone driver); this is described in the reference manual.
-
-Contents of the file ``Simple.td`` look like this::
+We have thus produced a simple driver called, appropriately, ``Simple``, from
+the input TableGen file ``Simple.td``. The ``llvmc`` program itself is generated
+using a similar process (see ``llvmc/src``). Contents of the file ``Simple.td``
+look like this::
 
     // Include common definitions
     include "llvm/CompilerDriver/Common.td"
@@ -79,37 +74,40 @@ Contents of the file ``Simple.td`` look like this::
     [(in_language "c"),
      (out_language "executable"),
      (output_suffix "out"),
-     (cmd_line "gcc $INFILE -o $OUTFILE"),
-     (sink)
+     (command "gcc"),
+     (sink),
+
+     // -o is what is used by default, out_file_option here is included for
+     // instructive purposes.
+     (out_file_option "-o")
     ]>;
 
     // Language map
-    def LanguageMap : LanguageMap<[LangToSuffixes<"c", ["c"]>]>;
+    def LanguageMap : LanguageMap<[(lang_to_suffixes "c", "c")]>;
 
     // Compilation graph
-    def CompilationGraph : CompilationGraph<[Edge<"root", "gcc">]>;
+    def CompilationGraph : CompilationGraph<[(edge "root", "gcc")]>;
 
-As you can see, this file consists of three parts: tool descriptions,
-language map, and the compilation graph definition.
+As you can see, this file consists of three parts: tool descriptions, language
+map, and the compilation graph definition.
 
-At the heart of LLVMC is the idea of a compilation graph: vertices in
-this graph are tools, and edges represent a transformation path
-between two tools (for example, assembly source produced by the
-compiler can be transformed into executable code by an assembler). The
-compilation graph is basically a list of edges; a special node named
-``root`` is used to mark graph entry points.
+At the heart of LLVMC is the idea of a compilation graph: vertices in this graph
+are tools, and edges represent a transformation path between two tools (for
+example, assembly source produced by the compiler can be transformed into
+executable code by an assembler). The compilation graph is basically a list of
+edges; a special node named ``root`` is used to mark graph entry points.
 
-Tool descriptions are represented as property lists: most properties
-in the example above should be self-explanatory; the ``sink`` property
-means that all options lacking an explicit description should be
-forwarded to this tool.
+Tool descriptions are represented as property lists: most properties in the
+example above should be self-explanatory; the ``sink`` property means that all
+options lacking an explicit description should be forwarded to this tool.
 
-The ``LanguageMap`` associates a language name with a list of suffixes
-and is used for deciding which toolchain corresponds to a given input
-file.
+The ``LanguageMap`` associates a language name with a list of suffixes and is
+used for deciding which toolchain corresponds to a given input file.
 
-To learn more about LLVMC customization, refer to the reference
-manual and plugin source code in the ``plugins`` directory.
+To learn more about writing your own drivers with LLVMC, refer to the reference
+manual and examples in the ``examples`` directory. Of a particular interest is
+the ``Skeleton`` example, which can serve as a template for your LLVMC-based
+drivers.
 
 .. raw:: html
 
diff --git a/tools/llvmc/examples/Skeleton/README b/tools/llvmc/examples/Skeleton/README
index 61ff6fb..282ee15 100644
--- a/tools/llvmc/examples/Skeleton/README
+++ b/tools/llvmc/examples/Skeleton/README
@@ -1,6 +1,6 @@
 
 This is a template that can be used to create your own LLVMC-based drivers. Just
 copy the `Skeleton` directory to the location of your preference and edit
-`Skeleton/Makefile` and `Skeleton/AutoGenerated.inc`.
+`Skeleton/Makefile` and `Skeleton/AutoGenerated.td`.
 
 The build system assumes that your project is based on LLVM.
diff --git a/tools/llvmc/src/Base.td.in b/tools/llvmc/src/Base.td.in
index 50533f1..527913c 100644
--- a/tools/llvmc/src/Base.td.in
+++ b/tools/llvmc/src/Base.td.in
@@ -191,7 +191,7 @@ class llvm_gcc_based <string cmd, string in_lang, string E_ext, dag out_lang,
          // ('-S' && '-emit-llvm') && !('opt') -> output .ll
          (and (switch_on "emit-llvm", "S"), (not (switch_on "opt"))),
               [(forward "S"), (output_suffix "ll")],
-         // Ususally just output .bc
+         // Usually just output .bc
          (not (switch_on "fsyntax-only")),
               [(append_cmd "-c"), (append_cmd "-emit-llvm")],
 
diff --git a/tools/lto/CMakeLists.txt b/tools/lto/CMakeLists.txt
new file mode 100644
index 0000000..7e2c5f0
--- /dev/null
+++ b/tools/lto/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(LLVM_LINK_COMPONENTS
+  ${LLVM_TARGETS_TO_BUILD}
+  ipo scalaropts linker bitreader bitwriter mcdisassembler)
+
+add_definitions( -DLLVM_VERSION_INFO=\"${PACKAGE_VERSION}\" )
+
+set(SOURCES
+  LTOCodeGenerator.cpp
+  lto.cpp
+  LTOModule.cpp
+  )
+
+if( NOT WIN32 AND LLVM_ENABLE_PIC )
+  set(bsl ${BUILD_SHARED_LIBS})
+  set(BUILD_SHARED_LIBS ON)
+  add_llvm_library(LTO ${SOURCES})
+  set_property(TARGET LTO PROPERTY OUTPUT_NAME "LTO")
+  set(BUILD_SHARED_LIBS ${bsl})
+  set(LTO_STATIC_TARGET_NAME LTO_static)
+else()
+  set(LTO_STATIC_TARGET_NAME LTO)
+endif()
+
+if( NOT BUILD_SHARED_LIBS )
+  add_llvm_library(${LTO_STATIC_TARGET_NAME} ${SOURCES})
+  set_property(TARGET ${LTO_STATIC_TARGET_NAME} PROPERTY OUTPUT_NAME "LTO")
+endif()
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index f72fdb0..d95f354 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -75,7 +75,6 @@ LTOCodeGenerator::LTOCodeGenerator()
 {
     InitializeAllTargets();
     InitializeAllAsmPrinters();
-    InitializeAllAsmParsers();
 }
 
 LTOCodeGenerator::~LTOCodeGenerator()
@@ -88,7 +87,17 @@ LTOCodeGenerator::~LTOCodeGenerator()
 
 bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg)
 {
-    return _linker.LinkInModule(mod->getLLVVMModule(), &errMsg);
+
+  if(mod->getLLVVMModule()->MaterializeAllPermanently(&errMsg))
+    return true;
+
+  bool ret = _linker.LinkInModule(mod->getLLVVMModule(), &errMsg);
+
+  const std::vector<const char*> &undefs = mod->getAsmUndefinedRefs();
+  for (int i = 0, e = undefs.size(); i != e; ++i)
+    _asmUndefinedRefs[undefs[i]] = 1;
+
+  return ret;
 }
     
 
@@ -167,51 +176,63 @@ bool LTOCodeGenerator::writeMergedModules(const char *path,
 }
 
 
-const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg)
+bool LTOCodeGenerator::compile_to_file(const char** name, std::string& errMsg)
 {
-    // make unique temp .o file to put generated object file
-    sys::PathWithStatus uniqueObjPath("lto-llvm.o");
-    if ( uniqueObjPath.createTemporaryFileOnDisk(false, &errMsg) ) {
-        uniqueObjPath.eraseFromDisk();
-        return NULL;
-    }
-    sys::RemoveFileOnSignal(uniqueObjPath);
-
-    // generate object file
-    bool genResult = false;
-    tool_output_file objFile(uniqueObjPath.c_str(), errMsg);
-    if (!errMsg.empty())
-      return NULL;
-    genResult = this->generateObjectFile(objFile.os(), errMsg);
-    objFile.os().close();
-    if (objFile.os().has_error()) {
-      objFile.os().clear_error();
-      return NULL;
-    }
-    objFile.keep();
-    if ( genResult ) {
-      uniqueObjPath.eraseFromDisk();
-      return NULL;
-    }
+  // make unique temp .o file to put generated object file
+  sys::PathWithStatus uniqueObjPath("lto-llvm.o");
+  if ( uniqueObjPath.createTemporaryFileOnDisk(false, &errMsg) ) {
+    uniqueObjPath.eraseFromDisk();
+    return true;
+  }
+  sys::RemoveFileOnSignal(uniqueObjPath);
+
+  // generate object file
+  bool genResult = false;
+  tool_output_file objFile(uniqueObjPath.c_str(), errMsg);
+  if (!errMsg.empty())
+    return NULL;
+  genResult = this->generateObjectFile(objFile.os(), errMsg);
+  objFile.os().close();
+  if (objFile.os().has_error()) {
+    objFile.os().clear_error();
+    return true;
+  }
+  objFile.keep();
+  if ( genResult ) {
+    uniqueObjPath.eraseFromDisk();
+    return true;
+  }
 
-    const std::string& uniqueObjStr = uniqueObjPath.str();
-    // remove old buffer if compile() called twice
-    delete _nativeObjectFile;
+  _nativeObjectPath = uniqueObjPath.str();
+  *name = _nativeObjectPath.c_str();
+  return false;
+}
 
-    // read .o file into memory buffer
-    OwningPtr<MemoryBuffer> BuffPtr;
-    if (error_code ec = MemoryBuffer::getFile(uniqueObjStr.c_str(),BuffPtr))
-      errMsg = ec.message();
-    _nativeObjectFile = BuffPtr.take();
+const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg)
+{
+  const char *name;
+  if (compile_to_file(&name, errMsg))
+    return NULL;
+
+  // remove old buffer if compile() called twice
+  delete _nativeObjectFile;
+
+  // read .o file into memory buffer
+  OwningPtr<MemoryBuffer> BuffPtr;
+  if (error_code ec = MemoryBuffer::getFile(name, BuffPtr, -1, false)) {
+    errMsg = ec.message();
+    return NULL;
+  }
+  _nativeObjectFile = BuffPtr.take();
 
-    // remove temp files
-    uniqueObjPath.eraseFromDisk();
+  // remove temp files
+  sys::Path(_nativeObjectPath).eraseFromDisk();
 
-    // return buffer, unless error
-    if ( _nativeObjectFile == NULL )
-        return NULL;
-    *length = _nativeObjectFile->getBufferSize();
-    return _nativeObjectFile->getBufferStart();
+  // return buffer, unless error
+  if ( _nativeObjectFile == NULL )
+    return NULL;
+  *length = _nativeObjectFile->getBufferSize();
+  return _nativeObjectFile->getBufferStart();
 }
 
 bool LTOCodeGenerator::determineTarget(std::string& errMsg)
@@ -249,6 +270,34 @@ bool LTOCodeGenerator::determineTarget(std::string& errMsg)
     return false;
 }
 
+void LTOCodeGenerator::applyRestriction(GlobalValue &GV,
+                                     std::vector<const char*> &mustPreserveList,
+                                        SmallPtrSet<GlobalValue*, 8> &asmUsed,
+                                        Mangler &mangler) {
+  SmallString<64> Buffer;
+  mangler.getNameWithPrefix(Buffer, &GV, false);
+
+  if (GV.isDeclaration())
+    return;
+  if (_mustPreserveSymbols.count(Buffer))
+    mustPreserveList.push_back(GV.getName().data());
+  if (_asmUndefinedRefs.count(Buffer))
+    asmUsed.insert(&GV);
+}
+
+static void findUsedValues(GlobalVariable *LLVMUsed,
+                           SmallPtrSet<GlobalValue*, 8> &UsedValues) {
+  if (LLVMUsed == 0) return;
+
+  ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+  if (Inits == 0) return;
+
+  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+    if (GlobalValue *GV = 
+          dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+      UsedValues.insert(GV);
+}
+
 void LTOCodeGenerator::applyScopeRestrictions() {
   if (_scopeRestrictionsDone) return;
   Module *mergedModule = _linker.getModule();
@@ -258,38 +307,47 @@ void LTOCodeGenerator::applyScopeRestrictions() {
   passes.add(createVerifierPass());
 
   // mark which symbols can not be internalized 
-  if (!_mustPreserveSymbols.empty()) {
-    MCContext Context(*_target->getMCAsmInfo(), NULL);
-    Mangler mangler(Context, *_target->getTargetData());
-    std::vector<const char*> mustPreserveList;
-    SmallString<64> Buffer;
-    for (Module::iterator f = mergedModule->begin(),
-         e = mergedModule->end(); f != e; ++f) {
-      Buffer.clear();
-      mangler.getNameWithPrefix(Buffer, f, false);
-      if (!f->isDeclaration() &&
-          _mustPreserveSymbols.count(Buffer))
-        mustPreserveList.push_back(f->getName().data());
-    }
-    for (Module::global_iterator v = mergedModule->global_begin(), 
-         e = mergedModule->global_end(); v !=  e; ++v) {
-      Buffer.clear();
-      mangler.getNameWithPrefix(Buffer, v, false);
-      if (!v->isDeclaration() &&
-          _mustPreserveSymbols.count(Buffer))
-        mustPreserveList.push_back(v->getName().data());
-    }
-    for (Module::alias_iterator a = mergedModule->alias_begin(),
-         e = mergedModule->alias_end(); a != e; ++a) {
-      Buffer.clear();
-      mangler.getNameWithPrefix(Buffer, a, false);
-      if (!a->isDeclaration() &&
-          _mustPreserveSymbols.count(Buffer))
-        mustPreserveList.push_back(a->getName().data());
-    }
-    passes.add(createInternalizePass(mustPreserveList));
+  MCContext Context(*_target->getMCAsmInfo(), NULL);
+  Mangler mangler(Context, *_target->getTargetData());
+  std::vector<const char*> mustPreserveList;
+  SmallPtrSet<GlobalValue*, 8> asmUsed;
+
+  for (Module::iterator f = mergedModule->begin(),
+         e = mergedModule->end(); f != e; ++f)
+    applyRestriction(*f, mustPreserveList, asmUsed, mangler);
+  for (Module::global_iterator v = mergedModule->global_begin(), 
+         e = mergedModule->global_end(); v !=  e; ++v)
+    applyRestriction(*v, mustPreserveList, asmUsed, mangler);
+  for (Module::alias_iterator a = mergedModule->alias_begin(),
+         e = mergedModule->alias_end(); a != e; ++a)
+    applyRestriction(*a, mustPreserveList, asmUsed, mangler);
+
+  GlobalVariable *LLVMCompilerUsed =
+    mergedModule->getGlobalVariable("llvm.compiler.used");
+  findUsedValues(LLVMCompilerUsed, asmUsed);
+  if (LLVMCompilerUsed)
+    LLVMCompilerUsed->eraseFromParent();
+
+  const llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(_context);
+  std::vector<Constant*> asmUsed2;
+  for (SmallPtrSet<GlobalValue*, 16>::const_iterator i = asmUsed.begin(),
+         e = asmUsed.end(); i !=e; ++i) {
+    GlobalValue *GV = *i;
+    Constant *c = ConstantExpr::getBitCast(GV, i8PTy);
+    asmUsed2.push_back(c);
   }
-  
+
+  llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, asmUsed2.size());
+  LLVMCompilerUsed =
+    new llvm::GlobalVariable(*mergedModule, ATy, false,
+                             llvm::GlobalValue::AppendingLinkage,
+                             llvm::ConstantArray::get(ATy, asmUsed2),
+                             "llvm.compiler.used");
+
+  LLVMCompilerUsed->setSection("llvm.metadata");
+
+  passes.add(createInternalizePass(mustPreserveList));
+
   // apply scope restrictions
   passes.run(*mergedModule);
   
diff --git a/tools/lto/LTOCodeGenerator.h b/tools/lto/LTOCodeGenerator.h
index 0556520..f8fd357 100644
--- a/tools/lto/LTOCodeGenerator.h
+++ b/tools/lto/LTOCodeGenerator.h
@@ -19,6 +19,7 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 
 #include <string>
 
@@ -40,12 +41,17 @@ struct LTOCodeGenerator {
     void                addMustPreserveSymbol(const char* sym);
     bool                writeMergedModules(const char* path, 
                                                            std::string& errMsg);
+    bool                compile_to_file(const char** name, std::string& errMsg);
     const void*         compile(size_t* length, std::string& errMsg);
     void                setCodeGenDebugOptions(const char *opts); 
 private:
     bool                generateObjectFile(llvm::raw_ostream& out, 
                                            std::string& errMsg);
     void                applyScopeRestrictions();
+    void                applyRestriction(llvm::GlobalValue &GV,
+                                     std::vector<const char*> &mustPreserveList,
+                        llvm::SmallPtrSet<llvm::GlobalValue*, 8> &asmUsed,
+                                         llvm::Mangler &mangler);
     bool                determineTarget(std::string& errMsg);
     
     typedef llvm::StringMap<uint8_t> StringSet;
@@ -57,9 +63,11 @@ private:
     bool                        _scopeRestrictionsDone;
     lto_codegen_model           _codeModel;
     StringSet                   _mustPreserveSymbols;
+    StringSet                   _asmUndefinedRefs;
     llvm::MemoryBuffer*         _nativeObjectFile;
     std::vector<const char*>    _codegenOptions;
     std::string                 _mCpu;
+    std::string                 _nativeObjectPath;
 };
 
 #endif // LTO_CODE_GENERATOR_H
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index 1eac22c..8f2b1f4 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -26,11 +26,18 @@
 #include "llvm/Support/Host.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/system_error.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/SubtargetFeature.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetAsmParser.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetSelect.h"
@@ -73,7 +80,7 @@ bool LTOModule::isTargetMatch(MemoryBuffer *buffer, const char *triplePrefix) {
 
 
 LTOModule::LTOModule(Module *m, TargetMachine *t)
-  : _module(m), _target(t), _symbolsParsed(false)
+  : _module(m), _target(t)
 {
 }
 
@@ -84,32 +91,33 @@ LTOModule *LTOModule::makeLTOModule(const char *path,
     errMsg = ec.message();
     return NULL;
   }
-  return makeLTOModule(buffer.get(), errMsg);
+  return makeLTOModule(buffer.take(), errMsg);
 }
 
 LTOModule *LTOModule::makeLTOModule(int fd, const char *path,
-                                    off_t size,
+                                    size_t size,
+                                    std::string &errMsg) {
+  return makeLTOModule(fd, path, size, size, 0, errMsg);
+}
+
+LTOModule *LTOModule::makeLTOModule(int fd, const char *path,
+                                    size_t file_size,
+                                    size_t map_size,
+                                    off_t offset,
                                     std::string &errMsg) {
   OwningPtr<MemoryBuffer> buffer;
-  if (error_code ec = MemoryBuffer::getOpenFile(fd, path, buffer, size)) {
+  if (error_code ec = MemoryBuffer::getOpenFile(fd, path, buffer, file_size,
+                                                map_size, offset, false)) {
     errMsg = ec.message();
     return NULL;
   }
-  return makeLTOModule(buffer.get(), errMsg);
+  return makeLTOModule(buffer.take(), errMsg);
 }
 
-/// makeBuffer - Create a MemoryBuffer from a memory range.  MemoryBuffer
-/// requires the byte past end of the buffer to be a zero.  We might get lucky
-/// and already be that way, otherwise make a copy.  Also if next byte is on a
-/// different page, don't assume it is readable.
+/// makeBuffer - Create a MemoryBuffer from a memory range.
 MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length) {
   const char *startPtr = (char*)mem;
-  const char *endPtr = startPtr+length;
-  if (((uintptr_t)endPtr & (sys::Process::GetPageSize()-1)) == 0 ||
-      *endPtr != 0)
-    return MemoryBuffer::getMemBufferCopy(StringRef(startPtr, length));
-
-  return MemoryBuffer::getMemBuffer(StringRef(startPtr, length));
+  return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), "", false);
 }
 
 
@@ -118,17 +126,25 @@ LTOModule *LTOModule::makeLTOModule(const void *mem, size_t length,
   OwningPtr<MemoryBuffer> buffer(makeBuffer(mem, length));
   if (!buffer)
     return NULL;
-  return makeLTOModule(buffer.get(), errMsg);
+  return makeLTOModule(buffer.take(), errMsg);
 }
 
 LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
                                     std::string &errMsg) {
-  InitializeAllTargets();
+  static bool Initialized = false;
+  if (!Initialized) {
+    InitializeAllTargets();
+    InitializeAllAsmParsers();
+    Initialized = true;
+  }
 
   // parse bitcode buffer
-  OwningPtr<Module> m(ParseBitcodeFile(buffer, getGlobalContext(), &errMsg));
-  if (!m)
+  OwningPtr<Module> m(getLazyBitcodeModule(buffer, getGlobalContext(),
+                                           &errMsg));
+  if (!m) {
+    delete buffer;
     return NULL;
+  }
 
   std::string Triple = m->getTargetTriple();
   if (Triple.empty())
@@ -139,12 +155,18 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
   if (!march)
     return NULL;
 
-  // construct LTModule, hand over ownership of module and target
+  // construct LTOModule, hand over ownership of module and target
   SubtargetFeatures Features;
   Features.getDefaultSubtargetFeatures("" /* cpu */, llvm::Triple(Triple));
   std::string FeatureStr = Features.getString();
   TargetMachine *target = march->createTargetMachine(Triple, FeatureStr);
-  return new LTOModule(m.take(), target);
+  LTOModule *Ret = new LTOModule(m.take(), target);
+  bool Err = Ret->ParseSymbols();
+  if (Err) {
+    delete Ret;
+    return NULL;
+  }
+  return Ret;
 }
 
 
@@ -159,16 +181,6 @@ void LTOModule::setTargetTriple(const char *triple) {
 void LTOModule::addDefinedFunctionSymbol(Function *f, Mangler &mangler) {
   // add to list of defined symbols
   addDefinedSymbol(f, mangler, true);
-
-  // add external symbols referenced by this function.
-  for (Function::iterator b = f->begin(); b != f->end(); ++b) {
-    for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) {
-      for (unsigned count = 0, total = i->getNumOperands();
-           count != total; ++count) {
-        findExternalRefs(i->getOperand(count), mangler);
-      }
-    }
-  }
 }
 
 // Get string that data pointer points to.
@@ -309,12 +321,6 @@ void LTOModule::addDefinedDataSymbol(GlobalValue *v, Mangler &mangler) {
       }
     }
   }
-
-  // add external symbols referenced by this data.
-  for (unsigned count = 0, total = v->getNumOperands();
-       count != total; ++count) {
-    findExternalRefs(v->getOperand(count), mangler);
-  }
 }
 
 
@@ -324,10 +330,6 @@ void LTOModule::addDefinedSymbol(GlobalValue *def, Mangler &mangler,
   if (def->getName().startswith("llvm."))
     return;
 
-  // ignore available_externally
-  if (def->hasAvailableExternallyLinkage())
-    return;
-
   // string is owned by _defines
   SmallString<64> Buffer;
   mangler.getNameWithPrefix(Buffer, def, false);
@@ -383,7 +385,8 @@ void LTOModule::addDefinedSymbol(GlobalValue *def, Mangler &mangler,
   _symbols.push_back(info);
 }
 
-void LTOModule::addAsmGlobalSymbol(const char *name) {
+void LTOModule::addAsmGlobalSymbol(const char *name,
+                                   lto_symbol_attributes scope) {
   StringSet::value_type &entry = _defines.GetOrCreateValue(name);
 
   // only add new define if not already defined
@@ -393,13 +396,32 @@ void LTOModule::addAsmGlobalSymbol(const char *name) {
   entry.setValue(1);
   const char *symbolName = entry.getKey().data();
   uint32_t attr = LTO_SYMBOL_DEFINITION_REGULAR;
-  attr |= LTO_SYMBOL_SCOPE_DEFAULT;
+  attr |= scope;
   NameAndAttributes info;
   info.name = symbolName;
   info.attributes = (lto_symbol_attributes)attr;
   _symbols.push_back(info);
 }
 
+void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
+  StringMap<NameAndAttributes>::value_type &entry =
+    _undefines.GetOrCreateValue(name);
+
+  _asm_undefines.push_back(entry.getKey().data());
+
+  // we already have the symbol
+  if (entry.getValue().name)
+    return;
+
+  uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;;
+  attr |= LTO_SYMBOL_SCOPE_DEFAULT;
+  NameAndAttributes info;
+  info.name = entry.getKey().data();
+  info.attributes = (lto_symbol_attributes)attr;
+
+  entry.setValue(info);
+}
+
 void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl,
                                             Mangler &mangler) {
   // ignore all llvm.* symbols
@@ -432,41 +454,211 @@ void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl,
 }
 
 
+namespace {
+  class RecordStreamer : public MCStreamer {
+  public:
+    enum State { NeverSeen, Global, Defined, DefinedGlobal, Used};
+
+  private:
+    StringMap<State> Symbols;
+
+    void markDefined(const MCSymbol &Symbol) {
+      State &S = Symbols[Symbol.getName()];
+      switch (S) {
+      case DefinedGlobal:
+      case Global:
+        S = DefinedGlobal;
+        break;
+      case NeverSeen:
+      case Defined:
+      case Used:
+        S = Defined;
+        break;
+      }
+    }
+    void markGlobal(const MCSymbol &Symbol) {
+      State &S = Symbols[Symbol.getName()];
+      switch (S) {
+      case DefinedGlobal:
+      case Defined:
+        S = DefinedGlobal;
+        break;
+
+      case NeverSeen:
+      case Global:
+      case Used:
+        S = Global;
+        break;
+      }
+    }
+    void markUsed(const MCSymbol &Symbol) {
+      State &S = Symbols[Symbol.getName()];
+      switch (S) {
+      case DefinedGlobal:
+      case Defined:
+      case Global:
+        break;
+
+      case NeverSeen:
+      case Used:
+        S = Used;
+        break;
+      }
+    }
+
+    // FIXME: mostly copied for the obj streamer.
+    void AddValueSymbols(const MCExpr *Value) {
+      switch (Value->getKind()) {
+      case MCExpr::Target:
+        // FIXME: What should we do in here?
+        break;
+
+      case MCExpr::Constant:
+        break;
+
+      case MCExpr::Binary: {
+        const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+        AddValueSymbols(BE->getLHS());
+        AddValueSymbols(BE->getRHS());
+        break;
+      }
+
+      case MCExpr::SymbolRef:
+        markUsed(cast<MCSymbolRefExpr>(Value)->getSymbol());
+        break;
 
-// Find external symbols referenced by VALUE. This is a recursive function.
-void LTOModule::findExternalRefs(Value *value, Mangler &mangler) {
-  if (GlobalValue *gv = dyn_cast<GlobalValue>(value)) {
-    if (!gv->hasExternalLinkage())
-      addPotentialUndefinedSymbol(gv, mangler);
-    // If this is a variable definition, do not recursively process
-    // initializer.  It might contain a reference to this variable
-    // and cause an infinite loop.  The initializer will be
-    // processed in addDefinedDataSymbol().
-    return;
-  }
+      case MCExpr::Unary:
+        AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr());
+        break;
+      }
+    }
+
+  public:
+    typedef StringMap<State>::const_iterator const_iterator;
+
+    const_iterator begin() {
+      return Symbols.begin();
+    }
 
-  // GlobalValue, even with InternalLinkage type, may have operands with
-  // ExternalLinkage type. Do not ignore these operands.
-  if (Constant *c = dyn_cast<Constant>(value)) {
-    // Handle ConstantExpr, ConstantStruct, ConstantArry etc.
-    for (unsigned i = 0, e = c->getNumOperands(); i != e; ++i)
-      findExternalRefs(c->getOperand(i), mangler);
+    const_iterator end() {
+      return Symbols.end();
+    }
+
+    RecordStreamer(MCContext &Context) : MCStreamer(Context) {}
+
+    virtual void ChangeSection(const MCSection *Section) {}
+    virtual void InitSections() {}
+    virtual void EmitLabel(MCSymbol *Symbol) {
+      Symbol->setSection(*getCurrentSection());
+      markDefined(*Symbol);
+    }
+    virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+    virtual void EmitThumbFunc(MCSymbol *Func) {}
+    virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+      // FIXME: should we handle aliases?
+      markDefined(*Symbol);
+    }
+    virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
+      if (Attribute == MCSA_Global)
+        markGlobal(*Symbol);
+    }
+    virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+    virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {}
+    virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+    virtual void EmitCOFFSymbolStorageClass(int StorageClass) {}
+    virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                              unsigned Size , unsigned ByteAlignment) {
+      markDefined(*Symbol);
+    }
+    virtual void EmitCOFFSymbolType(int Type) {}
+    virtual void EndCOFFSymbolDef() {}
+    virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                  unsigned ByteAlignment) {
+      markDefined(*Symbol);
+    }
+    virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+    virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {}
+    virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                uint64_t Size, unsigned ByteAlignment) {}
+    virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {}
+    virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                               unsigned AddrSpace) {}
+    virtual void EmitULEB128Value(const MCExpr *Value) {}
+    virtual void EmitSLEB128Value(const MCExpr *Value) {}
+    virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+                                      unsigned ValueSize,
+                                      unsigned MaxBytesToEmit) {}
+    virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                   unsigned MaxBytesToEmit) {}
+    virtual void EmitValueToOffset(const MCExpr *Offset,
+                                   unsigned char Value ) {}
+    virtual void EmitFileDirective(StringRef Filename) {}
+    virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                          const MCSymbol *LastLabel,
+                                        const MCSymbol *Label) {}
+
+    virtual void EmitInstruction(const MCInst &Inst) {
+      // Scan for values.
+      for (unsigned i = Inst.getNumOperands(); i--; )
+        if (Inst.getOperand(i).isExpr())
+          AddValueSymbols(Inst.getOperand(i).getExpr());
+    }
+    virtual void Finish() {}
+  };
+}
+
+bool LTOModule::addAsmGlobalSymbols(MCContext &Context) {
+  const std::string &inlineAsm = _module->getModuleInlineAsm();
+
+  OwningPtr<RecordStreamer> Streamer(new RecordStreamer(Context));
+  MemoryBuffer *Buffer = MemoryBuffer::getMemBuffer(inlineAsm);
+  SourceMgr SrcMgr;
+  SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+  OwningPtr<MCAsmParser> Parser(createMCAsmParser(_target->getTarget(), SrcMgr,
+                                                  Context, *Streamer,
+                                                  *_target->getMCAsmInfo()));
+  OwningPtr<TargetAsmParser>
+    TAP(_target->getTarget().createAsmParser(*Parser.get(), *_target.get()));
+  Parser->setTargetParser(*TAP);
+  int Res = Parser->Run(false);
+  if (Res)
+    return true;
+
+  for (RecordStreamer::const_iterator i = Streamer->begin(),
+         e = Streamer->end(); i != e; ++i) {
+    StringRef Key = i->first();
+    RecordStreamer::State Value = i->second;
+    if (Value == RecordStreamer::DefinedGlobal)
+      addAsmGlobalSymbol(Key.data(), LTO_SYMBOL_SCOPE_DEFAULT);
+    else if (Value == RecordStreamer::Defined)
+      addAsmGlobalSymbol(Key.data(), LTO_SYMBOL_SCOPE_INTERNAL);
+    else if (Value == RecordStreamer::Global ||
+             Value == RecordStreamer::Used)
+      addAsmGlobalSymbolUndef(Key.data());
   }
+  return false;
 }
 
-void LTOModule::lazyParseSymbols() {
-  if (_symbolsParsed)
-    return;
+static bool isDeclaration(const GlobalValue &V) {
+  if (V.hasAvailableExternallyLinkage())
+    return true;
+  if (V.isMaterializable())
+    return false;
+  return V.isDeclaration();
+}
 
-  _symbolsParsed = true;
+static bool isAliasToDeclaration(const GlobalAlias &V) {
+  return isDeclaration(*V.getAliasedGlobal());
+}
 
+bool LTOModule::ParseSymbols() {
   // Use mangler to add GlobalPrefix to names to match linker names.
   MCContext Context(*_target->getMCAsmInfo(), NULL);
   Mangler mangler(Context, *_target->getTargetData());
 
   // add functions
   for (Module::iterator f = _module->begin(); f != _module->end(); ++f) {
-    if (f->isDeclaration())
+    if (isDeclaration(*f))
       addPotentialUndefinedSymbol(f, mangler);
     else
       addDefinedFunctionSymbol(f, mangler);
@@ -475,42 +667,20 @@ void LTOModule::lazyParseSymbols() {
   // add data
   for (Module::global_iterator v = _module->global_begin(),
          e = _module->global_end(); v !=  e; ++v) {
-    if (v->isDeclaration())
+    if (isDeclaration(*v))
       addPotentialUndefinedSymbol(v, mangler);
     else
       addDefinedDataSymbol(v, mangler);
   }
 
   // add asm globals
-  const std::string &inlineAsm = _module->getModuleInlineAsm();
-  const std::string glbl = ".globl";
-  std::string asmSymbolName;
-  std::string::size_type pos = inlineAsm.find(glbl, 0);
-  while (pos != std::string::npos) {
-    // eat .globl
-    pos = pos + 6;
-
-    // skip white space between .globl and symbol name
-    std::string::size_type pbegin = inlineAsm.find_first_not_of(' ', pos);
-    if (pbegin == std::string::npos)
-      break;
-
-    // find end-of-line
-    std::string::size_type pend = inlineAsm.find_first_of('\n', pbegin);
-    if (pend == std::string::npos)
-      break;
-
-    asmSymbolName.assign(inlineAsm, pbegin, pend - pbegin);
-    addAsmGlobalSymbol(asmSymbolName.c_str());
-
-    // search next .globl
-    pos = inlineAsm.find(glbl, pend);
-  }
+  if (addAsmGlobalSymbols(Context))
+    return true;
 
   // add aliases
   for (Module::alias_iterator i = _module->alias_begin(),
          e = _module->alias_end(); i != e; ++i) {
-    if (i->isDeclaration())
+    if (isAliasToDeclaration(*i))
       addPotentialUndefinedSymbol(i, mangler);
     else
       addDefinedDataSymbol(i, mangler);
@@ -526,17 +696,16 @@ void LTOModule::lazyParseSymbols() {
       _symbols.push_back(info);
     }
   }
+  return false;
 }
 
 
 uint32_t LTOModule::getSymbolCount() {
-  lazyParseSymbols();
   return _symbols.size();
 }
 
 
 lto_symbol_attributes LTOModule::getSymbolAttributes(uint32_t index) {
-  lazyParseSymbols();
   if (index < _symbols.size())
     return _symbols[index].attributes;
   else
@@ -544,7 +713,6 @@ lto_symbol_attributes LTOModule::getSymbolAttributes(uint32_t index) {
 }
 
 const char *LTOModule::getSymbolName(uint32_t index) {
-  lazyParseSymbols();
   if (index < _symbols.size())
     return _symbols[index].name;
   else
diff --git a/tools/lto/LTOModule.h b/tools/lto/LTOModule.h
index 1794d81..0b64a90 100644
--- a/tools/lto/LTOModule.h
+++ b/tools/lto/LTOModule.h
@@ -52,7 +52,12 @@ struct LTOModule {
     static LTOModule*        makeLTOModule(const char* path,
                                           std::string& errMsg);
     static LTOModule*        makeLTOModule(int fd, const char *path,
-                                           off_t size,
+                                           size_t size,
+                                           std::string& errMsg);
+    static LTOModule*        makeLTOModule(int fd, const char *path,
+                                           size_t file_size,
+                                           size_t map_size,
+                                           off_t offset,
                                            std::string& errMsg);
     static LTOModule*        makeLTOModule(const void* mem, size_t length,
                                            std::string& errMsg);
@@ -64,23 +69,27 @@ struct LTOModule {
     const char*              getSymbolName(uint32_t index);
     
     llvm::Module *           getLLVVMModule() { return _module.get(); }
+    const std::vector<const char*> &getAsmUndefinedRefs() {
+            return _asm_undefines;
+    }
 
 private:
                             LTOModule(llvm::Module* m, llvm::TargetMachine* t);
 
-    void                    lazyParseSymbols();
+    bool                    ParseSymbols();
     void                    addDefinedSymbol(llvm::GlobalValue* def, 
                                                     llvm::Mangler& mangler, 
                                                     bool isFunction);
     void                    addPotentialUndefinedSymbol(llvm::GlobalValue* decl, 
                                                         llvm::Mangler &mangler);
-    void                    findExternalRefs(llvm::Value* value, 
-                                                llvm::Mangler& mangler);
     void                    addDefinedFunctionSymbol(llvm::Function* f, 
                                                         llvm::Mangler &mangler);
     void                    addDefinedDataSymbol(llvm::GlobalValue* v, 
                                                         llvm::Mangler &mangler);
-    void                    addAsmGlobalSymbol(const char *);
+    bool                    addAsmGlobalSymbols(llvm::MCContext &Context);
+    void                    addAsmGlobalSymbol(const char *,
+                                               lto_symbol_attributes scope);
+    void                    addAsmGlobalSymbolUndef(const char *);
     void                    addObjCClass(llvm::GlobalVariable* clgv);
     void                    addObjCCategory(llvm::GlobalVariable* clgv);
     void                    addObjCClassRef(llvm::GlobalVariable* clgv);
@@ -103,11 +112,11 @@ private:
 
     llvm::OwningPtr<llvm::Module>           _module;
     llvm::OwningPtr<llvm::TargetMachine>    _target;
-    bool                                    _symbolsParsed;
     std::vector<NameAndAttributes>          _symbols;
     // _defines and _undefines only needed to disambiguate tentative definitions
     StringSet                               _defines;    
     llvm::StringMap<NameAndAttributes>      _undefines;
+    std::vector<const char*>                _asm_undefines;
 };
 
 #endif // LTO_MODULE_H
diff --git a/tools/lto/Makefile b/tools/lto/Makefile
index 294c81b..46925e7 100644
--- a/tools/lto/Makefile
+++ b/tools/lto/Makefile
@@ -20,7 +20,8 @@ include $(LEVEL)/Makefile.config
 LINK_LIBS_IN_SHARED = 1
 SHARED_LIBRARY = 1
 
-LINK_COMPONENTS := $(TARGETS_TO_BUILD) ipo scalaropts linker bitreader bitwriter
+LINK_COMPONENTS := $(TARGETS_TO_BUILD) ipo scalaropts linker bitreader \
+	bitwriter mcdisassembler
 
 include $(LEVEL)/Makefile.common
 
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index f48570c..dd658d1 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -95,12 +95,25 @@ lto_module_t lto_module_create(const char* path)
 // loads an object file from disk
 // returns NULL on error (check lto_get_error_message() for details)
 //
-lto_module_t lto_module_create_from_fd(int fd, const char *path, off_t size)
+lto_module_t lto_module_create_from_fd(int fd, const char *path, size_t size)
 {
      return LTOModule::makeLTOModule(fd, path, size, sLastErrorString);
 }
 
 //
+// loads an object file from disk
+// returns NULL on error (check lto_get_error_message() for details)
+//
+lto_module_t lto_module_create_from_fd_at_offset(int fd, const char *path,
+                                                 size_t file_size,
+                                                 size_t map_size,
+                                                 off_t offset)
+{
+     return LTOModule::makeLTOModule(fd, path, file_size, map_size,
+                                     offset, sLastErrorString);
+}
+
+//
 // loads an object file from memory 
 // returns NULL on error (check lto_get_error_message() for details)
 //
@@ -268,7 +281,7 @@ bool lto_codegen_write_merged_modules(lto_code_gen_t cg, const char* path)
 
 //
 // Generates code for all added modules into one native object file.
-// On sucess returns a pointer to a generated mach-o/ELF buffer and
+// On success returns a pointer to a generated mach-o/ELF buffer and
 // length set to the buffer size.  The buffer is owned by the 
 // lto_code_gen_t and will be freed when lto_codegen_dispose()
 // is called, or lto_codegen_compile() is called again.
@@ -280,6 +293,12 @@ lto_codegen_compile(lto_code_gen_t cg, size_t* length)
   return cg->compile(length, sLastErrorString);
 }
 
+extern bool
+lto_codegen_compile_to_file(lto_code_gen_t cg, const char **name)
+{
+  return cg->compile_to_file(name, sLastErrorString);
+}
+
 
 //
 // Used to pass extra options to the code generator
diff --git a/tools/lto/lto.exports b/tools/lto/lto.exports
index a374091..b900bfb 100644
--- a/tools/lto/lto.exports
+++ b/tools/lto/lto.exports
@@ -2,6 +2,7 @@ lto_get_error_message
 lto_get_version
 lto_module_create
 lto_module_create_from_fd
+lto_module_create_from_fd_at_offset
 lto_module_create_from_memory
 lto_module_get_num_symbols
 lto_module_get_symbol_attribute
@@ -25,3 +26,7 @@ lto_codegen_debug_options
 lto_codegen_set_assembler_args
 lto_codegen_set_assembler_path
 lto_codegen_set_cpu
+lto_codegen_compile_to_file
+LLVMCreateDisasm
+LLVMDisasmDispose
+LLVMDisasmInstruction
diff --git a/tools/macho-dump/macho-dump.cpp b/tools/macho-dump/macho-dump.cpp
index c4c558d..f324259 100644
--- a/tools/macho-dump/macho-dump.cpp
+++ b/tools/macho-dump/macho-dump.cpp
@@ -49,25 +49,6 @@ static void Warning(const Twine &Msg) {
 
 ///
 
-static int DumpHeader(MachOObject &Obj) {
-  // Read the header.
-  const macho::Header &Hdr = Obj.getHeader();
-  outs() << "('cputype', " << Hdr.CPUType << ")\n";
-  outs() << "('cpusubtype', " << Hdr.CPUSubtype << ")\n";
-  outs() << "('filetype', " << Hdr.FileType << ")\n";
-  outs() << "('num_load_commands', " << Hdr.NumLoadCommands << ")\n";
-  outs() << "('load_commands_size', " << Hdr.SizeOfLoadCommands << ")\n";
-  outs() << "('flag', " << Hdr.Flags << ")\n";
-
-  // Print extended header if 64-bit.
-  if (Obj.is64Bit()) {
-    const macho::Header64Ext &Hdr64 = Obj.getHeader64Ext();
-    outs() << "('reserved', " << Hdr64.Reserved << ")\n";
-  }
-
-  return 0;
-}
-
 static void DumpSegmentCommandData(StringRef Name,
                                    uint64_t VMAddr, uint64_t VMSize,
                                    uint64_t FileOffset, uint64_t FileSize,
@@ -376,8 +357,8 @@ int main(int argc, char **argv) {
   if (!InputObject)
     return Error("unable to load object: '" + ErrorStr + "'");
 
-  if (int Res = DumpHeader(*InputObject))
-    return Res;
+  // Print the header
+  InputObject->printHeader(outs());
 
   // Print the load commands.
   int Res = 0;
diff --git a/tools/opt/GraphPrinters.cpp b/tools/opt/GraphPrinters.cpp
index 791caf5..30361f5 100644
--- a/tools/opt/GraphPrinters.cpp
+++ b/tools/opt/GraphPrinters.cpp
@@ -18,7 +18,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Value.h"
 #include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Support/ToolOutputFile.h"
 using namespace llvm;
 
@@ -103,13 +103,11 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
       AU.addRequired<DominatorTree>();
-      AU.addRequired<DominanceFrontier>();
 
     }
 
     virtual bool runOnFunction(Function &F) {
       getAnalysis<DominatorTree>().dump();
-      getAnalysis<DominanceFrontier>().dump();
       return false;
     }
   };
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index e55b4b3..25474c4 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/Signals.h"
@@ -132,11 +133,11 @@ static cl::opt<bool>
 AnalyzeOnly("analyze", cl::desc("Only perform analysis, no optimization"));
 
 static cl::opt<bool>
-PrintBreakpoints("print-breakpoints-for-testing", 
+PrintBreakpoints("print-breakpoints-for-testing",
                  cl::desc("Print select breakpoints location for testing"));
 
 static cl::opt<std::string>
-DefaultDataLayout("default-data-layout", 
+DefaultDataLayout("default-data-layout",
           cl::desc("data layout string to use if not specified by module"),
           cl::value_desc("layout-string"), cl::init(""));
 
@@ -327,7 +328,7 @@ struct BasicBlockPassPrinter : public BasicBlockPass {
           << "': Pass " << PassToPrint->getPassName() << ":\n";
 
     // Get and print pass...
-    getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out, 
+    getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out,
             BB.getParent()->getParent());
     return false;
   }
@@ -342,28 +343,43 @@ struct BasicBlockPassPrinter : public BasicBlockPass {
 
 char BasicBlockPassPrinter::ID = 0;
 
-struct BreakpointPrinter : public FunctionPass {
+struct BreakpointPrinter : public ModulePass {
   raw_ostream &Out;
   static char ID;
 
   BreakpointPrinter(raw_ostream &out)
-    : FunctionPass(ID), Out(out) {
+    : ModulePass(ID), Out(out) {
     }
 
-  virtual bool runOnFunction(Function &F) {
-    BasicBlock &EntryBB = F.getEntryBlock();
-    BasicBlock::const_iterator BI = EntryBB.end();
-    --BI;
-    do {
-      const Instruction *In = BI;
-      const DebugLoc DL = In->getDebugLoc();
-      if (!DL.isUnknown()) {
-        DIScope S(DL.getScope(getGlobalContext()));
-        Out << S.getFilename() << " " << DL.getLine() << "\n";
-        break;
+  void getContextName(DIDescriptor Context, std::string &N) {
+    if (Context.isNameSpace()) {
+      DINameSpace NS(Context);
+      if (!NS.getName().empty()) {
+        getContextName(NS.getContext(), N);
+        N = N + NS.getName().str() + "::";
+      }
+    } else if (Context.isType()) {
+      DIType TY(Context);
+      if (!TY.getName().empty()) {
+        getContextName(TY.getContext(), N);
+        N = N + TY.getName().str() + "::";
+      }
+    }
+  }
+
+  virtual bool runOnModule(Module &M) {
+    StringSet<> Processed;
+    if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp"))
+      for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+        std::string Name;
+        DISubprogram SP(NMD->getOperand(i));
+        if (SP.Verify())
+          getContextName(SP.getContext(), Name);
+        Name = Name + SP.getDisplayName().str();
+        if (!Name.empty() && Processed.insert(Name)) {
+          Out << Name << "\n";
+        }
       }
-      --BI;
-    } while (BI != EntryBB.begin());
     return false;
   }
 
@@ -463,7 +479,7 @@ int main(int argc, char **argv) {
 
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   LLVMContext &Context = getGlobalContext();
-  
+
   // Initialize passes
   PassRegistry &Registry = *PassRegistry::getPassRegistry();
   initializeCore(Registry);
@@ -475,7 +491,7 @@ int main(int argc, char **argv) {
   initializeInstCombine(Registry);
   initializeInstrumentation(Registry);
   initializeTarget(Registry);
-  
+
   cl::ParseCommandLineOptions(argc, argv,
     "llvm .bc -> .bc modular optimizer and analysis printer\n");
 
@@ -533,12 +549,12 @@ int main(int argc, char **argv) {
 
   // Add an appropriate TargetLibraryInfo pass for the module's triple.
   TargetLibraryInfo *TLI = new TargetLibraryInfo(Triple(M->getTargetTriple()));
-  
+
   // The -disable-simplify-libcalls flag actually disables all builtin optzns.
   if (DisableSimplifyLibCalls)
     TLI->disableAllFunctions();
   Passes.add(TLI);
-  
+
   // Add an appropriate TargetData instance for this module.
   TargetData *TD = 0;
   const std::string &ModuleDataLayout = M.get()->getDataLayout();
@@ -562,7 +578,7 @@ int main(int argc, char **argv) {
     if (!Out) {
       if (OutputFilename.empty())
         OutputFilename = "-";
-      
+
       std::string ErrorInfo;
       Out.reset(new tool_output_file(OutputFilename.c_str(), ErrorInfo,
                                      raw_fd_ostream::F_Binary));
@@ -685,6 +701,9 @@ int main(int argc, char **argv) {
       Passes.add(createBitcodeWriterPass(Out->os()));
   }
 
+  // Before executing passes, print the final values of the LLVM options.
+  cl::PrintOptionValues();
+
   // Now that we have all of the passes ready, run them.
   Passes.run(*M.get());
 
diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp
index 964b04d..5f05b86 100644
--- a/unittests/ADT/APFloatTest.cpp
+++ b/unittests/ADT/APFloatTest.cpp
@@ -576,4 +576,25 @@ TEST(APFloatTest, StringHexadecimalExponentDeath) {
 #endif
 #endif
 
+TEST(APFloatTest, exactInverse) {
+  APFloat inv(0.0f);
+
+  // Trivial operation.
+  EXPECT_TRUE(APFloat(2.0).getExactInverse(&inv));
+  EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(0.5)));
+  EXPECT_TRUE(APFloat(2.0f).getExactInverse(&inv));
+  EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(0.5f)));
+
+  // FLT_MIN
+  EXPECT_TRUE(APFloat(1.17549435e-38f).getExactInverse(&inv));
+  EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(8.5070592e+37f)));
+
+  // Large float, inverse is a denormal.
+  EXPECT_FALSE(APFloat(1.7014118e38f).getExactInverse(0));
+  // Zero
+  EXPECT_FALSE(APFloat(0.0).getExactInverse(0));
+  // Denormalized float
+  EXPECT_FALSE(APFloat(1.40129846e-45f).getExactInverse(0));
+}
+
 }
diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp
index e05bdbf..dbd0cb7 100644
--- a/unittests/ADT/APIntTest.cpp
+++ b/unittests/ADT/APIntTest.cpp
@@ -348,6 +348,8 @@ TEST(APIntTest, magicu) {
   EXPECT_EQ(APInt(32, 5).magicu().s, 2U);
   EXPECT_EQ(APInt(32, 7).magicu().m, APInt(32, "24924925", 16));
   EXPECT_EQ(APInt(32, 7).magicu().s, 3U);
+  EXPECT_EQ(APInt(64, 25).magicu(1).m, APInt(64, "A3D70A3D70A3D70B", 16));
+  EXPECT_EQ(APInt(64, 25).magicu(1).s, 4U);
 }
 
 #ifdef GTEST_HAS_DEATH_TEST
diff --git a/unittests/Support/MathExtrasTest.cpp b/unittests/Support/MathExtrasTest.cpp
index 3db1f77..0a6724c 100644
--- a/unittests/Support/MathExtrasTest.cpp
+++ b/unittests/Support/MathExtrasTest.cpp
@@ -73,7 +73,7 @@ TEST(MathExtras, CountLeadingOnes_64) {
 }
 
 TEST(MathExtras, FloatBits) {
-  static const float kValue = 5632.34;
+  static const float kValue = 5632.34f;
   EXPECT_FLOAT_EQ(kValue, BitsToFloat(FloatToBits(kValue)));
 }
 
diff --git a/unittests/Transforms/Utils/Cloning.cpp b/unittests/Transforms/Utils/Cloning.cpp
index b65ac34..1ce549d1 100644
--- a/unittests/Transforms/Utils/Cloning.cpp
+++ b/unittests/Transforms/Utils/Cloning.cpp
@@ -9,6 +9,7 @@
 
 #include "gtest/gtest.h"
 #include "llvm/Argument.h"
+#include "llvm/Constant.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/ADT/SmallPtrSet.h"
diff --git a/unittests/Transforms/Utils/Local.cpp b/unittests/Transforms/Utils/Local.cpp
index e0322b3..3026b4b 100644
--- a/unittests/Transforms/Utils/Local.cpp
+++ b/unittests/Transforms/Utils/Local.cpp
@@ -26,7 +26,7 @@ TEST(Local, RecursivelyDeleteDeadPHINodes) {
   BasicBlock *bb1 = BasicBlock::Create(C);
 
   builder.SetInsertPoint(bb0);
-  PHINode    *phi = builder.CreatePHI(Type::getInt32Ty(C));
+  PHINode    *phi = builder.CreatePHI(Type::getInt32Ty(C), 2);
   BranchInst *br0 = builder.CreateCondBr(builder.getTrue(), bb0, bb1);
 
   builder.SetInsertPoint(bb1);
@@ -43,12 +43,12 @@ TEST(Local, RecursivelyDeleteDeadPHINodes) {
   EXPECT_EQ(&bb1->front(), br1);
 
   builder.SetInsertPoint(bb0);
-  phi = builder.CreatePHI(Type::getInt32Ty(C));
+  phi = builder.CreatePHI(Type::getInt32Ty(C), 0);
 
   EXPECT_TRUE(RecursivelyDeleteDeadPHINode(phi));
 
   builder.SetInsertPoint(bb0);
-  phi = builder.CreatePHI(Type::getInt32Ty(C));
+  phi = builder.CreatePHI(Type::getInt32Ty(C), 0);
   builder.CreateAdd(phi, phi);
 
   EXPECT_TRUE(RecursivelyDeleteDeadPHINode(phi));
diff --git a/unittests/VMCore/DerivedTypesTest.cpp b/unittests/VMCore/DerivedTypesTest.cpp
index 9dea6ff..9562157 100644
--- a/unittests/VMCore/DerivedTypesTest.cpp
+++ b/unittests/VMCore/DerivedTypesTest.cpp
@@ -19,13 +19,13 @@ namespace {
 
 static void PR7658() {
   LLVMContext ctx;
-  
+
   WeakVH NullPtr;
   PATypeHolder h1;
   {
     OpaqueType *o1 = OpaqueType::get(ctx);
     PointerType *p1 = PointerType::get(o1, 0);
-    
+
     std::vector<const Type *> t1;
     t1.push_back(IntegerType::get(ctx, 32));
     t1.push_back(p1);
@@ -33,41 +33,41 @@ static void PR7658() {
     OpaqueType *o2 = OpaqueType::get (ctx);
     PointerType *p2 = PointerType::get (o2, 0);
     t1.push_back(p2);
-    
-    
+
+
     StructType *s1 = StructType::get(ctx, t1);
     h1 = s1;
     o1->refineAbstractTypeTo(s1);
     o2->refineAbstractTypeTo(h1.get());  // h1 = { i32, \2*, \2* }
   }
-  
-  
+
+
   OpaqueType *o3 = OpaqueType::get(ctx);
   PointerType *p3 = PointerType::get(o3, 0);  // p3 = opaque*
-  
+
   std::vector<const Type *> t2;
   t2.push_back(IntegerType::get(ctx, 32));
   t2.push_back(p3);
-  
+
   std::vector<Constant *> v2;
   v2.push_back(ConstantInt::get(IntegerType::get(ctx, 32), 14));
   v2.push_back(ConstantPointerNull::get(p3));
-  
+
   OpaqueType *o4 = OpaqueType::get(ctx);
   {
     PointerType *p4 = PointerType::get(o4, 0);
     t2.push_back(p4);
     v2.push_back(ConstantPointerNull::get(p4));
   }
-  
+
   WeakVH CS = ConstantStruct::get(ctx, v2, false); // { i32 14, opaque* null, opaque* null}
-  
+
   StructType *s2 = StructType::get(ctx, t2);
   PATypeHolder h2(s2);
   o3->refineAbstractTypeTo(s2);
   o4->refineAbstractTypeTo(h2.get());
 }
-  
+
 
 TEST(OpaqueTypeTest, RegisterWithContext) {
   LLVMContext C;
@@ -81,7 +81,7 @@ TEST(OpaqueTypeTest, RegisterWithContext) {
     EXPECT_EQ(2u, pImpl->OpaqueTypes.size());
   }
   EXPECT_EQ(1u, pImpl->OpaqueTypes.size());
-  
+
   PR7658();
 }
 
diff --git a/unittests/VMCore/InstructionsTest.cpp b/unittests/VMCore/InstructionsTest.cpp
index 1d1127d..8edcce4 100644
--- a/unittests/VMCore/InstructionsTest.cpp
+++ b/unittests/VMCore/InstructionsTest.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/Instructions.h"
 #include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/ADT/STLExtras.h"
@@ -107,5 +108,18 @@ TEST(InstructionsTest, BranchInst) {
   delete bb1;
 }
 
+TEST(InstructionsTest, CastInst) {
+  LLVMContext &C(getGlobalContext());
+
+  const Type* Int8Ty = Type::getInt8Ty(C);
+  const Type* Int64Ty = Type::getInt64Ty(C);
+  const Type* V8x8Ty = VectorType::get(Int8Ty, 8);
+  const Type* X86MMXTy = Type::getX86_MMXTy(C);
+
+  EXPECT_TRUE(CastInst::isCastable(V8x8Ty, X86MMXTy));
+  EXPECT_TRUE(CastInst::isCastable(X86MMXTy, V8x8Ty));
+  EXPECT_FALSE(CastInst::isCastable(Int64Ty, X86MMXTy));
+}
+
 }  // end anonymous namespace
 }  // end namespace llvm
diff --git a/unittests/VMCore/MetadataTest.cpp b/unittests/VMCore/MetadataTest.cpp
index 942b848..0b2c012 100644
--- a/unittests/VMCore/MetadataTest.cpp
+++ b/unittests/VMCore/MetadataTest.cpp
@@ -87,10 +87,10 @@ TEST_F(MDNodeTest, Simple) {
   V.push_back(CI);
   V.push_back(s2);
 
-  MDNode *n1 = MDNode::get(Context, &V[0], 3);
+  MDNode *n1 = MDNode::get(Context, V);
   Value *const c1 = n1;
-  MDNode *n2 = MDNode::get(Context, &c1, 1);
-  MDNode *n3 = MDNode::get(Context, &V[0], 3);
+  MDNode *n2 = MDNode::get(Context, c1);
+  MDNode *n3 = MDNode::get(Context, V);
   EXPECT_NE(n1, n2);
 #ifdef ENABLE_MDNODE_UNIQUING
   EXPECT_EQ(n1, n3);
@@ -112,7 +112,7 @@ TEST_F(MDNodeTest, Delete) {
   Instruction *I = new BitCastInst(C, Type::getInt32Ty(getGlobalContext()));
 
   Value *const V = I;
-  MDNode *n = MDNode::get(Context, &V, 1);
+  MDNode *n = MDNode::get(Context, V);
   WeakVH wvh = n;
 
   EXPECT_EQ(n, wvh);
@@ -127,8 +127,8 @@ TEST(NamedMDNodeTest, Search) {
 
   Value *const V = C;
   Value *const V2 = C2;
-  MDNode *n = MDNode::get(Context, &V, 1);
-  MDNode *n2 = MDNode::get(Context, &V2, 1);
+  MDNode *n = MDNode::get(Context, V);
+  MDNode *n2 = MDNode::get(Context, V2);
 
   Module M("MyModule", Context);
   const char *Name = "llvm.NMD1";
diff --git a/unittests/VMCore/PassManagerTest.cpp b/unittests/VMCore/PassManagerTest.cpp
index 0073751..2f2a200 100644
--- a/unittests/VMCore/PassManagerTest.cpp
+++ b/unittests/VMCore/PassManagerTest.cpp
@@ -40,7 +40,7 @@ namespace llvm {
   void initializeCGPassPass(PassRegistry&);
   void initializeLPassPass(PassRegistry&);
   void initializeBPassPass(PassRegistry&);
-  
+
   namespace {
     // ND = no deps
     // NM = no modifications
diff --git a/unittests/VMCore/ValueMapTest.cpp b/unittests/VMCore/ValueMapTest.cpp
index 152e8ea..b493920 100644
--- a/unittests/VMCore/ValueMapTest.cpp
+++ b/unittests/VMCore/ValueMapTest.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/ValueMap.h"
+#include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/ADT/OwningPtr.h"
diff --git a/utils/CollectDebugInfoUsingLLDB.py b/utils/CollectDebugInfoUsingLLDB.py
deleted file mode 100755
index 4dbd19a..0000000
--- a/utils/CollectDebugInfoUsingLLDB.py
+++ /dev/null
@@ -1,182 +0,0 @@
-#!/usr/bin/python
-
-#----------------------------------------------------------------------
-# 
-# Be sure to add the python path that points to the LLDB shared library.
-# On MacOSX csh, tcsh:
-#   setenv PYTHONPATH /Developer/Library/PrivateFrameworks/LLDB.framework/Resources/Python
-# On MacOSX sh, bash:
-#   export PYTHONPATH=/Developer/Library/PrivateFrameworks/LLDB.framework/Resources/Python
-#
-# This script collect debugging information using LLDB. This script is
-# used by TEST=dbg in llvm testsuite to measure quality of debug info in
-# optimized builds.
-#
-# Usage:
-# export PYTHONPATH=...
-# ./CollectDebugInfUsingLLDB.py program bp_file out_file
-#     program - Executable program with debug info.
-#     bp_file - Simple text file listing breakpoints.
-#               <absolute file name> <line number>
-#     out_file - Output file where the debug info will be emitted.
-#----------------------------------------------------------------------
-
-import lldb
-import os
-import sys
-import time
-
-# AlreadyPrintedValues - A place to keep track of recursive values.
-AlreadyPrintedValues = {}
-
-# ISAlreadyPrinted - Return true if value is already printed.
-def IsAlreadyPrinted(value_name):
-        if AlreadyPrintedValues.get(value_name) is None:
-                AlreadyPrintedValues[value_name] = 1
-                return False
-        return True
-
-
-# print_var_value - Print a variable's value.
-def print_var_value (v, file, frame):
-        if v.IsValid() == False:
-                return
-        if IsAlreadyPrinted(v.GetName()):
-                return
-        total_children = v.GetNumChildren()
-        if total_children > 0:
-            c = 0
-            while (c < total_children) :
-                    child = v.GetChildAtIndex(c)
-                    if child is None:
-                        file.write("None")
-                    else:
-                        if (child.GetName()) is None:
-                                file.write("None")
-                        else:
-                                file.write(child.GetName())
-                                file.write('=')
-                                print_var_value(child, file, frame)
-                                file.write(',')
-                    c = c + 1
-        else:
-            if v.GetValue(frame) is None:
-                file.write("None")
-            else:
-                file.write(v.GetValue(frame))
-
-# print_vars - Print variable values in output file.
-def print_vars (tag, vars, fname, line, file, frame, target, thread):
-    # disable this thread.
-    count = thread.GetStopReasonDataCount()
-    bid = 0
-    tid = 0
-    for i in range(count):
-        id = thread.GetStopReasonDataAtIndex(i)
-        bp = target.FindBreakpointByID(id)
-        if bp.IsValid():
-            if bp.IsEnabled() == True:
-                    bid = bp.GetID()
-                    tid = bp.GetThreadID()
-                    bp.SetEnabled(False)
-        else:
-            bp_loc = bp.FindLocationByID(thread.GetStopReasonDataAtIndex(i+1))
-            if bp_loc.IsValid():
-                bid = bp_loc.GetBreakPoint().GetID()
-                tid = bp_loc.ThreadGetID()
-                bp_loc.SetEnabled(False);
-
-    for i in range(vars.GetSize()):
-            v = vars.GetValueAtIndex(i)
-            if v.GetName() is not None:
-                    file.write(tag)
-                    file.write(fname)
-                    file.write(':')
-                    file.write(str(line))
-                    file.write(' ')
-                    file.write(str(tid))
-                    file.write(':')
-                    file.write(str(bid))
-                    file.write(' ')
-                    file.write(v.GetName())
-                    file.write(' ')
-                    AlreadyPrintedValues.clear()
-                    print_var_value (v, file, frame)
-                    file.write('\n')
-
-# set_breakpoints - set breakpoints as listed in input file.
-def set_breakpoints (target, breakpoint_filename, file):
-    f = open(breakpoint_filename, "r")
-    lines = f.readlines()
-    for l in range(len(lines)):
-        c = lines[l].split()
-        # print "setting break point - ", c
-        bp = target.BreakpointCreateByLocation (str(c[0]), int(c[1]))
-        file.write("#Breakpoint ")
-        file.write(str(c[0]))
-        file.write(':')
-        file.write(str(c[1]))
-        file.write(' ')
-        file.write(str(bp.GetThreadID()))
-        file.write(':')
-        file.write(str(bp.GetID()))
-        file.write('\n')
-    f.close()
-
-# stopeed_at_breakpoint - Return True if process is stopeed at a
-# breakpoint.
-def stopped_at_breakpoint (process):
-    if process.IsValid():
-        state = process.GetState()
-        if state == lldb.eStateStopped:
-                thread = process.GetThreadAtIndex(0)
-                if thread.IsValid():
-                        if thread.GetStopReason() == lldb.eStopReasonBreakpoint:
-                                return True
-    return False
-
-# Create a new debugger instance
-debugger = lldb.SBDebugger.Create()
-
-# When we step or continue, don't return from the function until the process 
-# stops. We do this by setting the async mode to false.
-debugger.SetAsync (False)
-
-# Create a target from a file and arch
-##print "Creating a target for '%s'" % sys.argv[1]
-
-target = debugger.CreateTargetWithFileAndArch (sys.argv[1], lldb.LLDB_ARCH_DEFAULT)
-
-if target.IsValid():
-    #print "target is valid"
-    file=open(str(sys.argv[3]), 'w')    
-    set_breakpoints (target, sys.argv[2], file)
-
-    # Launch the process. Since we specified synchronous mode, we won't return
-    # from this function until we hit the breakpoint at main
-    sberror = lldb.SBError()
-    process = target.Launch (None, None, os.ctermid(), os.ctermid(), os.ctermid(), None, 0, False, sberror)
-    # Make sure the launch went ok
-    while stopped_at_breakpoint(process):
-        thread = process.GetThreadAtIndex (0)
-        frame = thread.GetFrameAtIndex (0)
-        if frame.IsValid():
-            # #Print some simple frame info
-            ##print frame
-            #print "frame is valid"
-            function = frame.GetFunction()
-            if function.IsValid():
-                fname = function.GetMangledName()
-                if fname is None:
-                    fname = function.GetName()
-                #print "function : ",fname
-                line = frame.GetLineEntry().GetLine()
-                vars = frame.GetVariables(1,0,0,0)
-                print_vars ("#Argument ", vars, fname, line, file, frame, target, thread)
-                # vars = frame.GetVariables(0,1,0,0)
-                # print_vars ("#Variables ", vars, fname, line, file, frame, target, thread)
-
-        process.Continue()
-    file.close()
-
-lldb.SBDebugger.Terminate()
diff --git a/utils/CompareDebugInfo.py b/utils/CompareDebugInfo.py
deleted file mode 100755
index 2cd647e..0000000
--- a/utils/CompareDebugInfo.py
+++ /dev/null
@@ -1,182 +0,0 @@
-#!/usr/bin/python
-
-import os
-import sys
-
-DBG_OUTPUT_FILE="Output/" + sys.argv[1] + ".dbg.out"
-OPT_DBG_OUTPUT_FILE="Output/" + sys.argv[1] + ".dbg.opt.out"
-LOG_FILE="Output/" + sys.argv[1] + ".log"
-NATIVE_DBG_OUTPUT_FILE="Output/" + sys.argv[1] + ".native.dbg.out"
-NATIVE_OPT_DBG_OUTPUT_FILE="Output/" + sys.argv[1] + ".native.dbg.opt.out"
-NATIVE_LOG_FILE="Output/" + sys.argv[1] + ".native.log"
-REPORT_FILE="Output/" + sys.argv[1] + ".dbg.report.html"
-
-class BreakPoint:
-    def __init__(self, bp_name):
-        self.name = bp_name
-        self.values = {}
-        self.missing_args = []
-        self.matching_args = []
-        self.notmatching_args = []
-        self.missing_bp = False
-
-    def setMissing(self):
-        self.missing_bp = True
-
-    def getArgCount(self):
-        return len(self.values)
-
-    def getMissingArgCount(self):
-        if self.missing_bp == True:
-            return len(self.values)
-        return len(self.missing_args)
-
-    def getMatchingArgCount(self):
-        if self.missing_bp == True:
-            return 0
-        return len(self.matching_args)
-
-    def getNotMatchingArgCount(self):
-        if self.missing_bp == True:
-            return 0
-        return len(self.notmatching_args)
-
-    def recordArgument(self, arg_name, value):
-        self.values[arg_name] = value
-        
-    def __repr__(self):
-        print self.name
-        items = self.values.items()
-        for i in range(len(items)):
-            print items[i][0]," = ",items[i][1]
-        return ''
-
-    def compare_args(self, other, file):
-        myitems = self.values.items()
-        otheritems = other.values.items()
-        match = False
-        for i in range(len(myitems)):
-            if i >= len(otheritems):
-                match = True
-                self.missing_args.append(myitems[i][0])
-            elif cmp(myitems[i][1], otheritems[i][1]):
-                match = True
-                self.notmatching_args.append(myitems[i][0])
-            else:
-                self.matching_args.append(myitems[i][0])
-
-        self.print_list(self.matching_args, " Matching arguments ", file)
-        self.print_list(self.notmatching_args, " Not Matching arguments ", file)
-        self.print_list(self.missing_args, " Missing arguments ", file)
-        return match
-
-    def print_list(self, items, txt, pfile):
-        if len(items) == 0:
-            return
-        pfile.write(self.name)
-        pfile.write(txt)
-        for e in items:
-            pfile.write(e)
-            pfile.write(' ')
-        pfile.write('\n')
-
-def read_input(filename, dict):
-    f = open(filename, "r")
-    lines = f.readlines()
-    for l in range(len(lines)):
-        c = lines[l].split()
-        if c[0] == "#Breakpoint":
-            bp = dict.get(c[2])
-            if bp is None:
-                bp = BreakPoint(c[1])
-            dict[c[2]] = bp
-        if c[0] == "#Argument":
-            bp = dict.get(c[2])
-            if bp is None:
-                bp = BreakPoint(c[1])
-            dict[c[2]] = bp
-            bp.recordArgument(c[3], c[4])
-    return
-
-f1_breakpoints = {}
-read_input(DBG_OUTPUT_FILE, f1_breakpoints)
-f1_items = f1_breakpoints.items()
-
-f2_breakpoints = {}
-read_input(OPT_DBG_OUTPUT_FILE, f2_breakpoints)
-f2_items = f2_breakpoints.items()
-    
-f = open(LOG_FILE, "w")
-f.write("Log output\n")
-for f2bp in range(len(f2_items)):
-    id = f2_items[f2bp][0]
-    bp = f2_items[f2bp][1]
-    bp1 = f1_breakpoints.get(id)
-    if bp1 is None:
-        bp.setMissing()
-    else:
-        bp1.compare_args(bp,f)
-f.close()
-
-nf1_breakpoints = {}
-read_input(NATIVE_DBG_OUTPUT_FILE, nf1_breakpoints)
-nf1_items = nf1_breakpoints.items()
-
-nf2_breakpoints = {}
-read_input(NATIVE_OPT_DBG_OUTPUT_FILE, nf2_breakpoints)
-nf2_items = nf2_breakpoints.items()
-    
-nfl = open(NATIVE_LOG_FILE, "w")
-for nf2bp in range(len(nf2_items)):
-    id = nf2_items[nf2bp][0]
-    bp = nf2_items[nf2bp][1]
-    bp1 = nf1_breakpoints.get(id)
-    if bp1 is None:
-        bp.setMissing()
-    else:
-        bp1.compare_args(bp,nfl)
-nfl.close()
-
-f1_arg_count = 0
-f1_matching_arg_count = 0
-f1_notmatching_arg_count = 0
-f1_missing_arg_count = 0
-for idx in range(len(f1_items)):
-    bp = f1_items[idx][1]
-    f1_arg_count = f1_arg_count + bp.getArgCount()
-    f1_matching_arg_count = f1_matching_arg_count + bp.getMatchingArgCount()
-    f1_notmatching_arg_count = f1_notmatching_arg_count + bp.getNotMatchingArgCount()
-    f1_missing_arg_count = f1_missing_arg_count + bp.getMissingArgCount()
-
-nf1_arg_count = 0
-nf1_matching_arg_count = 0
-nf1_notmatching_arg_count = 0
-nf1_missing_arg_count = 0
-for idx in range(len(nf1_items)):
-    bp = nf1_items[idx][1]
-    nf1_arg_count = nf1_arg_count + bp.getArgCount()
-    nf1_matching_arg_count = nf1_matching_arg_count + bp.getMatchingArgCount()
-    nf1_notmatching_arg_count = nf1_notmatching_arg_count + bp.getNotMatchingArgCount()
-    nf1_missing_arg_count = nf1_missing_arg_count + bp.getMissingArgCount()
-
-rf = open(REPORT_FILE, "w")
-rf.write("<tr><td>")
-rf.write(str(sys.argv[1]))
-rf.write("</td><td>|</td><td>")
-rf.write(str(nf1_arg_count))
-rf.write("</td><td><b>")
-rf.write(str(nf1_matching_arg_count))
-rf.write("</b></td><td>")
-rf.write(str(nf1_notmatching_arg_count))
-rf.write("</td><td>")
-rf.write(str(nf1_missing_arg_count))
-rf.write("</td><td>|</td><td>")
-rf.write(str(f1_arg_count))
-rf.write("</td><td><b>")
-rf.write(str(f1_matching_arg_count))
-rf.write("</b></td><td>")
-rf.write(str(f1_notmatching_arg_count))
-rf.write("</td><td>")
-rf.write(str(f1_missing_arg_count))
-rf.write("\n")
-rf.close()
diff --git a/utils/DSAextract.py b/utils/DSAextract.py
index 134e945..89dece1 100644
--- a/utils/DSAextract.py
+++ b/utils/DSAextract.py
@@ -58,7 +58,7 @@ node_set = set()
 #read the file one line at a time
 buffer = input.readline()
 while buffer != '':
-	#filter out the unecessary checks on all the edge lines
+	#filter out the unnecessary checks on all the edge lines
 	if not arrowexp.search(buffer):
 		#check to see if this is a node we are looking for
 		for regexp in regexp_list:
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index 5d4cb0c..f225594 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -131,26 +131,34 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
   }
 
   // Paren value #0 is for the fully matched string.  Any new parenthesized
-  // values add from their.
+  // values add from there.
   unsigned CurParen = 1;
 
   // Otherwise, there is at least one regex piece.  Build up the regex pattern
   // by escaping scary characters in fixed strings, building up one big regex.
   while (!PatternStr.empty()) {
     // RegEx matches.
-    if (PatternStr.size() >= 2 &&
-        PatternStr[0] == '{' && PatternStr[1] == '{') {
+    if (PatternStr.startswith("{{")) {
 
       // Otherwise, this is the start of a regex match.  Scan for the }}.
       size_t End = PatternStr.find("}}");
       if (End == StringRef::npos) {
         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
-                        "found start of regex string with no end '}}'", "error");
+                        "found start of regex string with no end '}}'","error");
         return true;
       }
 
+      // Enclose {{}} patterns in parens just like [[]] even though we're not
+      // capturing the result for any purpose.  This is required in case the
+      // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
+      // want this to turn into: "abc(x|z)def" not "abcx|zdef".
+      RegExStr += '(';
+      ++CurParen;
+
       if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
         return true;
+      RegExStr += ')';
+
       PatternStr = PatternStr.substr(End+2);
       continue;
     }
@@ -160,8 +168,7 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
     // second form is [[foo]] which is a reference to foo.  The variable name
     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
     // it.  This is to catch some common errors.
-    if (PatternStr.size() >= 2 &&
-        PatternStr[0] == '[' && PatternStr[1] == '[') {
+    if (PatternStr.startswith("[[")) {
       // Verify that it is terminated properly.
       size_t End = PatternStr.find("]]");
       if (End == StringRef::npos) {
@@ -185,10 +192,7 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
 
       // Verify that the name is well formed.
       for (unsigned i = 0, e = Name.size(); i != e; ++i)
-        if (Name[i] != '_' &&
-            (Name[i] < 'a' || Name[i] > 'z') &&
-            (Name[i] < 'A' || Name[i] > 'Z') &&
-            (Name[i] < '0' || Name[i] > '9')) {
+        if (Name[i] != '_' && !isalnum(Name[i])) {
           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
                           "invalid name in named regex", "error");
           return true;
diff --git a/utils/GenLibDeps.pl b/utils/GenLibDeps.pl
index ca852ad..0cd9e6a 100755
--- a/utils/GenLibDeps.pl
+++ b/utils/GenLibDeps.pl
@@ -202,7 +202,7 @@ sub gen_one_entry {
     print "$lib:";
     if ($WHY) { print "\n"; }
   } else {
-    print "  <dt><b>$lib</b</dt><dd><ul>\n";
+    print "  <dt><b>$lib</b></dt><dd><ul>\n";
   }
   open UNDEFS, 
     "$nmPath -u $Directory/$lib | sed -e 's/^[ 0]* U //' | sort | uniq |";
diff --git a/utils/KillTheDoctor/KillTheDoctor.cpp b/utils/KillTheDoctor/KillTheDoctor.cpp
index 7a89dd3..1ddae0b 100644
--- a/utils/KillTheDoctor/KillTheDoctor.cpp
+++ b/utils/KillTheDoctor/KillTheDoctor.cpp
@@ -169,14 +169,14 @@ namespace {
 static error_code GetFileNameFromHandle(HANDLE FileHandle,
                                         std::string& Name) {
   char Filename[MAX_PATH+1];
-  bool Sucess = false;
+  bool Success = false;
   Name.clear();
 
   // Get the file size.
   LARGE_INTEGER FileSize;
-  Sucess = ::GetFileSizeEx(FileHandle, &FileSize);
+  Success = ::GetFileSizeEx(FileHandle, &FileSize);
 
-  if (!Sucess)
+  if (!Success)
     return windows_error(::GetLastError());
 
   // Create a file mapping object.
@@ -198,12 +198,12 @@ static error_code GetFileNameFromHandle(HANDLE FileHandle,
   if (!MappedFile)
     return windows_error(::GetLastError());
 
-  Sucess = ::GetMappedFileNameA(::GetCurrentProcess(),
+  Success = ::GetMappedFileNameA(::GetCurrentProcess(),
                                 MappedFile,
                                 Filename,
                                 array_lengthof(Filename) - 1);
 
-  if (!Sucess)
+  if (!Success)
     return windows_error(::GetLastError());
   else {
     Name = Filename;
diff --git a/utils/NewNightlyTest.pl b/utils/NewNightlyTest.pl
index 1b48168..da806e9 100755
--- a/utils/NewNightlyTest.pl
+++ b/utils/NewNightlyTest.pl
@@ -794,7 +794,7 @@ my %hash_of_data = (
   'endtime' => $endtime,
   'target_triple' => $targetTriple,
 
-  # Unused, but left around for backwards compatability.
+  # Unused, but left around for backwards compatibility.
   'warnings' => "",
   'cvsusercommitlist' => "",
   'cvsuserupdatelist' => "",
diff --git a/utils/TableGen/ARMDecoderEmitter.cpp b/utils/TableGen/ARMDecoderEmitter.cpp
index a8de745..62bd1c6 100644
--- a/utils/TableGen/ARMDecoderEmitter.cpp
+++ b/utils/TableGen/ARMDecoderEmitter.cpp
@@ -607,7 +607,7 @@ void ARMFilter::recurse() {
     for (bitIndex = 0; bitIndex < NumBits; bitIndex++)
       BitValueArray[StartBit + bitIndex] = BIT_UNSET;
 
-    // Delegates to an inferior filter chooser for futher processing on this
+    // Delegates to an inferior filter chooser for further processing on this
     // group of instructions whose segment values are variable.
     FilterChooserMap.insert(std::pair<unsigned, ARMFilterChooser*>(
                               (unsigned)-1,
@@ -639,7 +639,7 @@ void ARMFilter::recurse() {
         BitValueArray[StartBit + bitIndex] = BIT_FALSE;
     }
 
-    // Delegates to an inferior filter chooser for futher processing on this
+    // Delegates to an inferior filter chooser for further processing on this
     // category of instructions.
     FilterChooserMap.insert(std::pair<unsigned, ARMFilterChooser*>(
                               mapIterator->first,
@@ -1382,7 +1382,7 @@ bool ARMFilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
     // 2. source registers are identical => VMOVQ; otherwise => VORRq
     // 3. LDR, LDRcp => return LDR for now.
     // FIXME: How can we distinguish between LDR and LDRcp?  Do we need to?
-    // 4. tLDM, tLDM_UPD => Rn = Inst{10-8}, reglist = Inst{7-0},
+    // 4. tLDMIA, tLDMIA_UPD => Rn = Inst{10-8}, reglist = Inst{7-0},
     //    wback = registers<Rn> = 0
     // NOTE: (tLDM, tLDM_UPD) resolution must come before Advanced SIMD
     //       addressing mode resolution!!!
@@ -1423,7 +1423,7 @@ bool ARMFilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
         << "; // Returning LDR for {LDR, LDRcp}\n";
       return true;
     }
-    if (name1 == "tLDM" && name2 == "tLDM_UPD") {
+    if (name1 == "tLDMIA" && name2 == "tLDMIA_UPD") {
       // Inserting the opening curly brace for this case block.
       --Indentation; --Indentation;
       o.indent(Indentation) << "{\n";
@@ -1570,9 +1570,7 @@ ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI,
     return false;
 
   if (TN == TARGET_ARM) {
-    // FIXME: what about Int_MemBarrierV6 and Int_SyncBarrierV6?
-    if ((Name != "Int_MemBarrierV7" && Name != "Int_SyncBarrierV7") &&
-        Form == ARM_FORMAT_PSEUDO)
+    if (Form == ARM_FORMAT_PSEUDO)
       return false;
     if (thumbInstruction(Form))
       return false;
@@ -1586,61 +1584,18 @@ ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI,
         Name == "MOVr_TC")
       return false;
 
+    // Delegate ADR disassembly to the more generic ADDri/SUBri instructions.
+    if (Name == "ADR")
+      return false;
+
     //
     // The following special cases are for conflict resolutions.
     //
 
-    // NEON NLdStFrm conflict resolutions:
-    //
-    // 1. Ignore suffix "odd" and "odd_UPD", prefer the "even" register-
-    //    numbered ones which have the same Asm format string.
-    // 2. Ignore VST2d64_UPD, which conflicts with VST1q64_UPD.
-    // 3. Ignore VLD2d64_UPD, which conflicts with VLD1q64_UPD.
-    // 4. Ignore VLD1q[_UPD], which conflicts with VLD1q64[_UPD].
-    // 5. Ignore VST1q[_UPD], which conflicts with VST1q64[_UPD].
-    if (Name.endswith("odd") || Name.endswith("odd_UPD") ||
-        Name == "VST2d64_UPD" || Name == "VLD2d64_UPD" ||
-        Name == "VLD1q" || Name == "VLD1q_UPD" ||
-        Name == "VST1q" || Name == "VST1q_UPD")
-      return false;
-
     // RSCSri and RSCSrs set the 's' bit, but are not predicated.  We are
     // better off using the generic RSCri and RSCrs instructions.
     if (Name == "RSCSri" || Name == "RSCSrs") return false;
 
-    // MOVCCr, MOVCCs, MOVCCi, MOVCCi16, FCYPScc, FCYPDcc, FNEGScc, and
-    // FNEGDcc are used in the compiler to implement conditional moves.
-    // We can ignore them in favor of their more generic versions of
-    // instructions. See also SDNode *ARMDAGToDAGISel::Select(SDValue Op).
-    if (Name == "MOVCCr"   || Name == "MOVCCs"  || Name == "MOVCCi" ||
-        Name == "MOVCCi16" || Name == "FCPYScc" || Name == "FCPYDcc" ||
-        Name == "FNEGScc"  || Name == "FNEGDcc")
-      return false;
-
-    // Ditto for VMOVDcc, VMOVScc, VNEGDcc, and VNEGScc.
-    if (Name == "VMOVDcc" || Name == "VMOVScc" || Name == "VNEGDcc" ||
-        Name == "VNEGScc")
-      return false;
-
-    // LDMIA_RET is a special case of LDM (Load Multiple) where the registers
-    // loaded include the PC, causing a branch to a loaded address.  Ignore
-    // the LDMIA_RET instruction when decoding.
-    if (Name == "LDMIA_RET") return false;
-
-    // Bcc is in a more generic form than B.  Ignore B when decoding.
-    if (Name == "B") return false;
-
-    // Ignore the non-Darwin BL instructions and the TPsoft (TLS) instruction.
-    if (Name == "BL" || Name == "BL_pred" || Name == "BLX" || Name == "BX" ||
-        Name == "TPsoft")
-      return false;
-
-    // Ignore VDUPf[d|q] instructions known to conflict with VDUP32[d-q] for
-    // decoding.  The instruction duplicates an element from an ARM core
-    // register into every element of the destination vector.  There is no
-    // distinction between data types.
-    if (Name == "VDUPfd" || Name == "VDUPfq") return false;
-
     // A8-598: VEXT
     // Vector Extract extracts elements from the bottom end of the second
     // operand vector and the top end of the first, concatenates them and
@@ -1656,34 +1611,23 @@ ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI,
     if (Name == "VEXTd16" || Name == "VEXTd32" || Name == "VEXTdf" ||
         Name == "VEXTq16" || Name == "VEXTq32" || Name == "VEXTqf")
       return false;
-
-    // Vector Reverse is similar to Vector Extract.  There is no distinction
-    // between data types, other than size.
-    //
-    // VREV64df is equivalent to VREV64d32.
-    // VREV64qf is equivalent to VREV64q32.
-    if (Name == "VREV64df" || Name == "VREV64qf") return false;
-
-    // VDUPLNfd is equivalent to VDUPLN32d.
-    // VDUPLNfq is equivalent to VDUPLN32q.
-    // VLD1df is equivalent to VLD1d32.
-    // VLD1qf is equivalent to VLD1q32.
-    // VLD2d64 is equivalent to VLD1q64.
-    // VST1df is equivalent to VST1d32.
-    // VST1qf is equivalent to VST1q32.
-    // VST2d64 is equivalent to VST1q64.
-    if (Name == "VDUPLNfd" || Name == "VDUPLNfq" ||
-        Name == "VLD1df"   || Name == "VLD1qf"   || Name == "VLD2d64" ||
-        Name == "VST1df"   || Name == "VST1qf"   || Name == "VST2d64")
-      return false;
   } else if (TN == TARGET_THUMB) {
     if (!thumbInstruction(Form))
       return false;
 
+    // A8.6.189 STM / STMIA / STMEA -- Encoding T1
+    // There's only STMIA_UPD for Thumb1.
+    if (Name == "tSTMIA")
+      return false;
+
     // On Darwin R9 is call-clobbered.  Ignore the non-Darwin counterparts.
     if (Name == "tBL" || Name == "tBLXi" || Name == "tBLXr")
       return false;
 
+    // A8.6.25 BX.  Use the generic tBX_Rm, ignore tBX_RET and tBX_RET_vararg.
+    if (Name == "tBX_RET" || Name == "tBX_RET_vararg")
+      return false;
+
     // Ignore the TPsoft (TLS) instructions, which conflict with tBLr9.
     if (Name == "tTPsoft" || Name == "t2TPsoft")
       return false;
@@ -1692,6 +1636,11 @@ ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI,
     if (Name == "tADR")
       return false;
 
+    // Delegate t2ADR disassembly to the more generic t2ADDri12/t2SUBri12
+    // instructions.
+    if (Name == "t2ADR")
+      return false;
+
     // Ignore tADDrSP, tADDspr, and tPICADD, prefer the generic tADDhirr.
     // Ignore t2SUBrSPs, prefer the t2SUB[S]r[r|s].
     // Ignore t2ADDrSPs, prefer the t2ADD[S]r[r|s].
@@ -1703,6 +1652,11 @@ ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI,
         Name == "t2ADDrSPi12" || Name == "t2SUBrSPi12")
       return false;
 
+    // FIXME: Use ldr.n to work around a Darwin assembler bug.
+    // Introduce a workaround with tLDRpciDIS opcode.
+    if (Name == "tLDRpci")
+      return false;
+
     // Ignore t2LDRDpci, prefer the generic t2LDRDi8, t2LDRD_PRE, t2LDRD_POST.
     if (Name == "t2LDRDpci")
       return false;
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index e3def41..1d14037 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -88,7 +88,7 @@
 //   2. The operand matcher will try every possible entry with the same
 //      mnemonic and will check if the target feature for this mnemonic also
 //      matches. After that, if the operand to be matched has its index
-//      present in the mask, a successfull match occurs. Otherwise, fallback
+//      present in the mask, a successful match occurs. Otherwise, fallback
 //      to the regular operand parsing.
 //
 //   3. For a match success, each operand class that has a 'ParserMethod'
@@ -258,7 +258,7 @@ public:
       return ValueName < RHS.ValueName;
 
     default:
-      // This class preceeds the RHS if it is a proper subset of the RHS.
+      // This class precedes the RHS if it is a proper subset of the RHS.
       if (isSubsetOf(RHS))
         return true;
       if (RHS.isSubsetOf(*this))
@@ -1265,7 +1265,7 @@ void AsmMatcherInfo::BuildInfo() {
       II->BuildAliasResultOperands();
   }
 
-  // Reorder classes so that classes preceed super classes.
+  // Reorder classes so that classes precede super classes.
   std::sort(Classes.begin(), Classes.end(), less_ptr<ClassInfo>());
 }
 
@@ -1483,10 +1483,10 @@ static void EmitConvertToMCInst(CodeGenTarget &Target, StringRef ClassName,
     MatchableInfo &II = **it;
 
     // Check if we have a custom match function.
-    StringRef AsmMatchConverter = II.getResultInst()->TheDef->getValueAsString(
-      "AsmMatchConverter");
+    std::string AsmMatchConverter =
+      II.getResultInst()->TheDef->getValueAsString("AsmMatchConverter");
     if (!AsmMatchConverter.empty()) {
-      std::string Signature = "ConvertCustom_" + AsmMatchConverter.str();
+      std::string Signature = "ConvertCustom_" + AsmMatchConverter;
       II.ConversionFnKind = Signature;
 
       // Check if we have already generated this signature.
@@ -1538,7 +1538,7 @@ static void EmitConvertToMCInst(CodeGenTarget &Target, StringRef ClassName,
         // operand from the earlier one.We can only tie single MCOperand values.
         //assert(OpInfo.MINumOperands == 1 && "Not a singular MCOperand");
         unsigned TiedOp = OpInfo.TiedOperandNum;
-        assert(i > TiedOp && "Tied operand preceeds its target!");
+        assert(i > TiedOp && "Tied operand precedes its target!");
         CaseOS << "    Inst.addOperand(Inst.getOperand(" << TiedOp << "));\n";
         Signature += "__Tie" + utostr(TiedOp);
         break;
diff --git a/utils/TableGen/AsmMatcherEmitter.h b/utils/TableGen/AsmMatcherEmitter.h
index 729c938..c13adf3 100644
--- a/utils/TableGen/AsmMatcherEmitter.h
+++ b/utils/TableGen/AsmMatcherEmitter.h
@@ -16,8 +16,6 @@
 #define ASMMATCHER_EMITTER_H
 
 #include "TableGenBackend.h"
-#include <map>
-#include <vector>
 #include <cassert>
 
 namespace llvm {
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index cd31e0c..2b1a4cc 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -542,12 +542,220 @@ void AsmWriterEmitter::EmitGetInstructionName(raw_ostream &O) {
   << "}\n\n#endif\n";
 }
 
-void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
-  CodeGenTarget Target(Records);
-  Record *AsmWriter = Target.getAsmWriter();
+namespace {
 
-  O << "\n#ifdef PRINT_ALIAS_INSTR\n";
-  O << "#undef PRINT_ALIAS_INSTR\n\n";
+/// SubtargetFeatureInfo - Helper class for storing information on a subtarget
+/// feature which participates in instruction matching.
+struct SubtargetFeatureInfo {
+  /// \brief The predicate record for this feature.
+  const Record *TheDef;
+
+  /// \brief An unique index assigned to represent this feature.
+  unsigned Index;
+
+  SubtargetFeatureInfo(const Record *D, unsigned Idx) : TheDef(D), Index(Idx) {}
+
+  /// \brief The name of the enumerated constant identifying this feature.
+  std::string getEnumName() const {
+    return "Feature_" + TheDef->getName();
+  }
+};
+
+struct AsmWriterInfo {
+  /// Map of Predicate records to their subtarget information.
+  std::map<const Record*, SubtargetFeatureInfo*> SubtargetFeatures;
+
+  /// getSubtargetFeature - Lookup or create the subtarget feature info for the
+  /// given operand.
+  SubtargetFeatureInfo *getSubtargetFeature(const Record *Def) const {
+    assert(Def->isSubClassOf("Predicate") && "Invalid predicate type!");
+    std::map<const Record*, SubtargetFeatureInfo*>::const_iterator I =
+      SubtargetFeatures.find(Def);
+    return I == SubtargetFeatures.end() ? 0 : I->second;
+  }
+
+  void addReqFeatures(const std::vector<Record*> &Features) {
+    for (std::vector<Record*>::const_iterator
+           I = Features.begin(), E = Features.end(); I != E; ++I) {
+      const Record *Pred = *I;
+
+      // Ignore predicates that are not intended for the assembler.
+      if (!Pred->getValueAsBit("AssemblerMatcherPredicate"))
+        continue;
+
+      if (Pred->getName().empty())
+        throw TGError(Pred->getLoc(), "Predicate has no name!");
+
+      // Don't add the predicate again.
+      if (getSubtargetFeature(Pred))
+        continue;
+
+      unsigned FeatureNo = SubtargetFeatures.size();
+      SubtargetFeatures[Pred] = new SubtargetFeatureInfo(Pred, FeatureNo);
+      assert(FeatureNo < 32 && "Too many subtarget features!");
+    }
+  }
+
+  const SubtargetFeatureInfo *getFeatureInfo(const Record *R) {
+    return SubtargetFeatures[R];
+  }
+};
+
+// IAPrinter - Holds information about an InstAlias. Two InstAliases match if
+// they both have the same conditionals. In which case, we cannot print out the
+// alias for that pattern.
+class IAPrinter {
+  AsmWriterInfo &AWI;
+  std::vector<std::string> Conds;
+  std::map<StringRef, unsigned> OpMap;
+  std::string Result;
+  std::string AsmString;
+  std::vector<Record*> ReqFeatures;
+public:
+  IAPrinter(AsmWriterInfo &Info, std::string R, std::string AS)
+    : AWI(Info), Result(R), AsmString(AS) {}
+
+  void addCond(const std::string &C) { Conds.push_back(C); }
+  void addReqFeatures(const std::vector<Record*> &Features) {
+    AWI.addReqFeatures(Features);
+    ReqFeatures = Features;
+  }
+
+  void addOperand(StringRef Op, unsigned Idx) { OpMap[Op] = Idx; }
+  unsigned getOpIndex(StringRef Op) { return OpMap[Op]; }
+  bool isOpMapped(StringRef Op) { return OpMap.find(Op) != OpMap.end(); }
+
+  bool print(raw_ostream &O) {
+    if (Conds.empty() && ReqFeatures.empty()) {
+      O.indent(6) << "return true;\n";
+      return false;
+    }
+
+    O << "if (";
+
+    for (std::vector<std::string>::iterator
+           I = Conds.begin(), E = Conds.end(); I != E; ++I) {
+      if (I != Conds.begin()) {
+        O << " &&\n";
+        O.indent(8);
+      }
+
+      O << *I;
+    }
+
+    if (!ReqFeatures.empty()) {
+      if (Conds.begin() != Conds.end()) {
+        O << " &&\n";
+        O.indent(8);
+      } else {
+        O << "if (";
+      }
+
+      std::string Req;
+      raw_string_ostream ReqO(Req);
+
+      for (std::vector<Record*>::iterator
+             I = ReqFeatures.begin(), E = ReqFeatures.end(); I != E; ++I) {
+        if (I != ReqFeatures.begin()) ReqO << " | ";
+        ReqO << AWI.getFeatureInfo(*I)->getEnumName();
+      }
+
+      O << "(AvailableFeatures & (" << ReqO.str() << ")) == ("
+        << ReqO.str() << ')';
+    }
+
+    O << ") {\n";
+    O.indent(6) << "// " << Result << "\n";
+    O.indent(6) << "AsmString = \"" << AsmString << "\";\n";
+
+    for (std::map<StringRef, unsigned>::iterator
+           I = OpMap.begin(), E = OpMap.end(); I != E; ++I)
+      O.indent(6) << "OpMap[\"" << I->first << "\"] = "
+                  << I->second << ";\n";
+
+    O.indent(6) << "break;\n";
+    O.indent(4) << '}';
+    return !ReqFeatures.empty();
+  }
+
+  bool operator==(const IAPrinter &RHS) {
+    if (Conds.size() != RHS.Conds.size())
+      return false;
+
+    unsigned Idx = 0;
+    for (std::vector<std::string>::iterator
+           I = Conds.begin(), E = Conds.end(); I != E; ++I)
+      if (*I != RHS.Conds[Idx++])
+        return false;
+
+    return true;
+  }
+
+  bool operator()(const IAPrinter &RHS) {
+    if (Conds.size() < RHS.Conds.size())
+      return true;
+
+    unsigned Idx = 0;
+    for (std::vector<std::string>::iterator
+           I = Conds.begin(), E = Conds.end(); I != E; ++I)
+      if (*I != RHS.Conds[Idx++])
+        return *I < RHS.Conds[Idx++];
+
+    return false;
+  }
+};
+
+} // end anonymous namespace
+
+/// EmitSubtargetFeatureFlagEnumeration - Emit the subtarget feature flag
+/// definitions.
+static void EmitSubtargetFeatureFlagEnumeration(AsmWriterInfo &Info,
+                                                raw_ostream &O) {
+  O << "namespace {\n\n";
+  O << "// Flags for subtarget features that participate in "
+    << "alias instruction matching.\n";
+  O << "enum SubtargetFeatureFlag {\n";
+
+  for (std::map<const Record*, SubtargetFeatureInfo*>::const_iterator
+         I = Info.SubtargetFeatures.begin(),
+         E = Info.SubtargetFeatures.end(); I != E; ++I) {
+    SubtargetFeatureInfo &SFI = *I->second;
+    O << "  " << SFI.getEnumName() << " = (1 << " << SFI.Index << "),\n";
+  }
+
+  O << "  Feature_None = 0\n";
+  O << "};\n\n";
+  O << "} // end anonymous namespace\n\n";
+}
+
+/// EmitComputeAvailableFeatures - Emit the function to compute the list of
+/// available features given a subtarget.
+static void EmitComputeAvailableFeatures(AsmWriterInfo &Info,
+                                         Record *AsmWriter,
+                                         CodeGenTarget &Target,
+                                         raw_ostream &O) {
+  std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
+
+  O << "unsigned " << Target.getName() << ClassName << "::\n"
+    << "ComputeAvailableFeatures(const " << Target.getName()
+    << "Subtarget *Subtarget) const {\n";
+  O << "  unsigned Features = 0;\n";
+
+  for (std::map<const Record*, SubtargetFeatureInfo*>::const_iterator
+         I = Info.SubtargetFeatures.begin(),
+         E = Info.SubtargetFeatures.end(); I != E; ++I) {
+    SubtargetFeatureInfo &SFI = *I->second;
+    O << "  if (" << SFI.TheDef->getValueAsString("CondString")
+      << ")\n";
+    O << "    Features |= " << SFI.getEnumName() << ";\n";
+  }
+
+  O << "  return Features;\n";
+  O << "}\n\n";
+}
+
+void AsmWriterEmitter::EmitRegIsInRegClass(raw_ostream &O) {
+  CodeGenTarget Target(Records);
 
   // Enumerate the register classes.
   const std::vector<CodeGenRegisterClass> &RegisterClasses =
@@ -606,6 +814,16 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   O << "  }\n\n";
   O << "  return false;\n";
   O << "}\n\n";
+}
+
+void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
+  CodeGenTarget Target(Records);
+  Record *AsmWriter = Target.getAsmWriter();
+
+  O << "\n#ifdef PRINT_ALIAS_INSTR\n";
+  O << "#undef PRINT_ALIAS_INSTR\n\n";
+
+  EmitRegIsInRegClass(O);
 
   // Emit the method that prints the alias instruction.
   std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
@@ -613,10 +831,6 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   bool isMC = AsmWriter->getValueAsBit("isMCAsmWriter");
   const char *MachineInstrClassName = isMC ? "MCInst" : "MachineInstr";
 
-  O << "bool " << Target.getName() << ClassName
-    << "::printAliasInstr(const " << MachineInstrClassName
-    << " *MI, raw_ostream &OS) {\n";
-
   std::vector<Record*> AllInstAliases =
     Records.getAllDerivedDefinitions("InstAlias");
 
@@ -626,44 +840,35 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
          I = AllInstAliases.begin(), E = AllInstAliases.end(); I != E; ++I) {
     CodeGenInstAlias *Alias = new CodeGenInstAlias(*I, Target);
     const Record *R = *I;
+    if (!R->getValueAsBit("EmitAlias"))
+      continue; // We were told not to emit the alias, but to emit the aliasee.
     const DagInit *DI = R->getValueAsDag("ResultInst");
     const DefInit *Op = dynamic_cast<const DefInit*>(DI->getOperator());
     AliasMap[getQualifiedName(Op->getDef())].push_back(Alias);
   }
 
-  if (AliasMap.empty() || !isMC) {
-    // FIXME: Support MachineInstr InstAliases?
-    O << "  return true;\n";
-    O << "}\n\n";
-    O << "#endif // PRINT_ALIAS_INSTR\n";
-    return;
-  }
-
-  O << "  StringRef AsmString;\n";
-  O << "  std::map<StringRef, unsigned> OpMap;\n";
-  O << "  switch (MI->getOpcode()) {\n";
-  O << "  default: return true;\n";
+  // A map of which conditions need to be met for each instruction operand
+  // before it can be matched to the mnemonic.
+  std::map<std::string, std::vector<IAPrinter*> > IAPrinterMap;
+  AsmWriterInfo AWI;
 
   for (std::map<std::string, std::vector<CodeGenInstAlias*> >::iterator
          I = AliasMap.begin(), E = AliasMap.end(); I != E; ++I) {
     std::vector<CodeGenInstAlias*> &Aliases = I->second;
 
-    std::map<std::string, unsigned> CondCount;
-    std::map<std::string, std::string> BodyMap;
-
-    std::string AsmString = "";
-
     for (std::vector<CodeGenInstAlias*>::iterator
            II = Aliases.begin(), IE = Aliases.end(); II != IE; ++II) {
       const CodeGenInstAlias *CGA = *II;
-      AsmString = CGA->AsmString;
-      unsigned Indent = 8;
+      IAPrinter *IAP = new IAPrinter(AWI, CGA->Result->getAsString(),
+                                     CGA->AsmString);
+
+      IAP->addReqFeatures(CGA->TheDef->getValueAsListOfDefs("Predicates"));
+
       unsigned LastOpNo = CGA->ResultInstOperandIndex.size();
 
       std::string Cond;
-      raw_string_ostream CondO(Cond);
-
-      CondO << "if (MI->getNumOperands() == " << LastOpNo;
+      Cond = std::string("MI->getNumOperands() == ") + llvm::utostr(LastOpNo);
+      IAP->addCond(Cond);
 
       std::map<StringRef, unsigned> OpMap;
       bool CantHandle = false;
@@ -678,23 +883,26 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
           StringRef ROName = RO.getName();
 
           if (Rec->isSubClassOf("RegisterClass")) {
-            CondO << " &&\n";
-            CondO.indent(Indent) << "MI->getOperand(" << i << ").isReg() &&\n";
-            if (OpMap.find(ROName) == OpMap.end()) {
-              OpMap[ROName] = i;
-              CondO.indent(Indent)
-                << "regIsInRegisterClass(RC_"
-                << CGA->ResultOperands[i].getRecord()->getName()
-                << ", MI->getOperand(" << i << ").getReg())";
+            Cond = std::string("MI->getOperand(")+llvm::utostr(i)+").isReg()";
+            IAP->addCond(Cond);
+
+            if (!IAP->isOpMapped(ROName)) {
+              IAP->addOperand(ROName, i);
+              Cond = std::string("regIsInRegisterClass(RC_") +
+                CGA->ResultOperands[i].getRecord()->getName() +
+                ", MI->getOperand(" + llvm::utostr(i) + ").getReg())";
+              IAP->addCond(Cond);
             } else {
-              CondO.indent(Indent)
-                << "MI->getOperand(" << i
-                << ").getReg() == MI->getOperand("
-                << OpMap[ROName] << ").getReg()";
+              Cond = std::string("MI->getOperand(") +
+                llvm::utostr(i) + ").getReg() == MI->getOperand(" +
+                llvm::utostr(IAP->getOpIndex(ROName)) + ").getReg()";
+              IAP->addCond(Cond);
             }
           } else {
             assert(Rec->isSubClassOf("Operand") && "Unexpected operand!");
             // FIXME: We need to handle these situations.
+            delete IAP;
+            IAP = 0;
             CantHandle = true;
             break;
           }
@@ -702,67 +910,92 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
           break;
         }
         case CodeGenInstAlias::ResultOperand::K_Imm:
-          CondO << " &&\n";
-          CondO.indent(Indent) << "MI->getOperand(" << i << ").getImm() == ";
-          CondO << CGA->ResultOperands[i].getImm();
+          Cond = std::string("MI->getOperand(") +
+            llvm::utostr(i) + ").getImm() == " +
+            llvm::utostr(CGA->ResultOperands[i].getImm());
+          IAP->addCond(Cond);
           break;
         case CodeGenInstAlias::ResultOperand::K_Reg:
-          CondO << " &&\n";
-          CondO.indent(Indent) << "MI->getOperand(" << i << ").getReg() == ";
-          CondO << Target.getName() << "::"
-                << CGA->ResultOperands[i].getRegister()->getName();
+          Cond = std::string("MI->getOperand(") +
+            llvm::utostr(i) + ").getReg() == " + Target.getName() +
+            "::" + CGA->ResultOperands[i].getRegister()->getName();
+          IAP->addCond(Cond);
           break;
         }
 
-        if (CantHandle) break;
+        if (!IAP) break;
       }
 
       if (CantHandle) continue;
+      IAPrinterMap[I->first].push_back(IAP);
+    }
+  }
 
-      CondO << ")";
-
-      std::string Body;
-      raw_string_ostream BodyO(Body);
+  EmitSubtargetFeatureFlagEnumeration(AWI, O);
+  EmitComputeAvailableFeatures(AWI, AsmWriter, Target, O);
 
-      BodyO << "      // " << CGA->Result->getAsString() << "\n";
-      BodyO << "      AsmString = \"" << AsmString << "\";\n";
+  O << "bool " << Target.getName() << ClassName
+    << "::printAliasInstr(const " << MachineInstrClassName
+    << " *MI, raw_ostream &OS) {\n";
 
-      for (std::map<StringRef, unsigned>::iterator
-             III = OpMap.begin(), IIE = OpMap.end(); III != IIE; ++III)
-        BodyO << "      OpMap[\"" << III->first << "\"] = "
-              << III->second << ";\n";
+  std::string Cases;
+  raw_string_ostream CasesO(Cases);
+  bool NeedAvailableFeatures = false;
+
+  for (std::map<std::string, std::vector<IAPrinter*> >::iterator
+         I = IAPrinterMap.begin(), E = IAPrinterMap.end(); I != E; ++I) {
+    std::vector<IAPrinter*> &IAPs = I->second;
+    std::vector<IAPrinter*> UniqueIAPs;
+
+    for (std::vector<IAPrinter*>::iterator
+           II = IAPs.begin(), IE = IAPs.end(); II != IE; ++II) {
+      IAPrinter *LHS = *II;
+      bool IsDup = false;
+      for (std::vector<IAPrinter*>::iterator
+             III = IAPs.begin(), IIE = IAPs.end(); III != IIE; ++III) {
+        IAPrinter *RHS = *III;
+        if (LHS != RHS && *LHS == *RHS) {
+          IsDup = true;
+          break;
+        }
+      }
 
-      ++CondCount[CondO.str()];
-      BodyMap[CondO.str()] = BodyO.str();
+      if (!IsDup) UniqueIAPs.push_back(LHS);
     }
 
-    std::string Code;
-    raw_string_ostream CodeO(Code);
-
-    bool EmitElse = false;
-    for (std::map<std::string, unsigned>::iterator
-           II = CondCount.begin(), IE = CondCount.end(); II != IE; ++II) {
-      if (II->second != 1) continue;
-      CodeO << "    ";
-      if (EmitElse) CodeO << "} else ";
-      CodeO << II->first << " {\n";
-      CodeO << BodyMap[II->first];
-      EmitElse = true;
+    if (UniqueIAPs.empty()) continue;
+
+    CasesO.indent(2) << "case " << I->first << ":\n";
+
+    for (std::vector<IAPrinter*>::iterator
+           II = UniqueIAPs.begin(), IE = UniqueIAPs.end(); II != IE; ++II) {
+      IAPrinter *IAP = *II;
+      CasesO.indent(4);
+      NeedAvailableFeatures |= IAP->print(CasesO);
+      CasesO << '\n';
     }
 
-    if (CodeO.str().empty()) continue;
+    CasesO.indent(4) << "return false;\n";
+  }
 
-    O << "  case " << I->first << ":\n";
-    O << CodeO.str();
-    O << "    }\n";
-    O << "    break;\n";
+  if (CasesO.str().empty() || !isMC) {
+    O << "  return false;\n";
+    O << "}\n\n";
+    O << "#endif // PRINT_ALIAS_INSTR\n";
+    return;
   }
 
-  O << "  }\n\n";
+  O.indent(2) << "StringRef AsmString;\n";
+  O.indent(2) << "std::map<StringRef, unsigned> OpMap;\n";
+  if (NeedAvailableFeatures)
+    O.indent(2) << "unsigned AvailableFeatures = getAvailableFeatures();\n\n";
+  O.indent(2) << "switch (MI->getOpcode()) {\n";
+  O.indent(2) << "default: return false;\n";
+  O << CasesO.str();
+  O.indent(2) << "}\n\n";
 
   // Code that prints the alias, replacing the operands with the ones from the
   // MCInst.
-  O << "  if (AsmString.empty()) return true;\n";
   O << "  std::pair<StringRef, StringRef> ASM = AsmString.split(' ');\n";
   O << "  OS << '\\t' << ASM.first;\n";
 
@@ -786,7 +1019,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   O << "    }\n";
   O << "  }\n\n";
   
-  O << "  return false;\n";
+  O << "  return true;\n";
   O << "}\n\n";
 
   O << "#endif // PRINT_ALIAS_INSTR\n";
diff --git a/utils/TableGen/AsmWriterEmitter.h b/utils/TableGen/AsmWriterEmitter.h
index 5e8d6f5..84c925b 100644
--- a/utils/TableGen/AsmWriterEmitter.h
+++ b/utils/TableGen/AsmWriterEmitter.h
@@ -38,6 +38,7 @@ private:
     void EmitPrintInstruction(raw_ostream &o);
     void EmitGetRegisterName(raw_ostream &o);
     void EmitGetInstructionName(raw_ostream &o);
+    void EmitRegIsInRegClass(raw_ostream &O);
     void EmitPrintAliasInstruction(raw_ostream &O);
     
     AsmWriterInst *getAsmWriterInstByID(unsigned ID) const {
diff --git a/utils/TableGen/CallingConvEmitter.h b/utils/TableGen/CallingConvEmitter.h
index 7fc2507..431c33b 100644
--- a/utils/TableGen/CallingConvEmitter.h
+++ b/utils/TableGen/CallingConvEmitter.h
@@ -16,8 +16,6 @@
 #define CALLINGCONV_EMITTER_H
 
 #include "TableGenBackend.h"
-#include <map>
-#include <vector>
 #include <cassert>
 
 namespace llvm {
diff --git a/utils/TableGen/ClangASTNodesEmitter.cpp b/utils/TableGen/ClangASTNodesEmitter.cpp
index 187ab46..d9d5a3c 100644
--- a/utils/TableGen/ClangASTNodesEmitter.cpp
+++ b/utils/TableGen/ClangASTNodesEmitter.cpp
@@ -155,10 +155,13 @@ void ClangDeclContextEmitter::run(raw_ostream &OS) {
     }
   }
 
-  for (RecordSet::iterator i = DeclContexts.begin(), e = DeclContexts.end();
-       i != e; ++i) {
-    OS << "DECL_CONTEXT(" << (*i)->getName() << ")\n";
-  }
+  // To keep identical order, RecordVector may be used
+  // instead of RecordSet.
+  for (RecordVector::iterator
+         i = DeclContextsVector.begin(), e = DeclContextsVector.end();
+       i != e; ++i)
+    if (DeclContexts.find(*i) != DeclContexts.end())
+      OS << "DECL_CONTEXT(" << (*i)->getName() << ")\n";
 
   OS << "#undef DECL_CONTEXT\n";
   OS << "#undef DECL_CONTEXT_BASE\n";
diff --git a/utils/TableGen/ClangAttrEmitter.cpp b/utils/TableGen/ClangAttrEmitter.cpp
index 27e1e02..26bd878 100644
--- a/utils/TableGen/ClangAttrEmitter.cpp
+++ b/utils/TableGen/ClangAttrEmitter.cpp
@@ -46,6 +46,7 @@ std::string ReadPCHRecord(StringRef type) {
               ">(GetDecl(Record[Idx++]))")
     .Case("QualType", "GetType(Record[Idx++])")
     .Case("Expr *", "ReadSubExpr()")
+    .Case("IdentifierInfo *", "GetIdentifierInfo(Record, Idx)")
     .Default("Record[Idx++]");
 }
 
@@ -56,6 +57,8 @@ std::string WritePCHRecord(StringRef type, StringRef name) {
                         ", Record);\n")
     .Case("QualType", "AddTypeRef(" + std::string(name) + ", Record);\n")
     .Case("Expr *", "AddStmt(" + std::string(name) + ");\n")
+    .Case("IdentifierInfo *", 
+          "AddIdentifierRef(" + std::string(name) + ", Record);\n")
     .Default("Record.push_back(" + std::string(name) + ");\n");
 }
 
@@ -415,6 +418,47 @@ namespace {
       OS << "Record.push_back(SA->get" << getUpperName() << "());\n";
     }
   };
+
+  class VersionArgument : public Argument {
+  public:
+    VersionArgument(Record &Arg, StringRef Attr)
+      : Argument(Arg, Attr)
+    {}
+
+    void writeAccessors(raw_ostream &OS) const {
+      OS << "  VersionTuple get" << getUpperName() << "() const {\n";
+      OS << "    return " << getLowerName() << ";\n";
+      OS << "  }\n";
+      OS << "  void set" << getUpperName() 
+         << "(ASTContext &C, VersionTuple V) {\n";
+      OS << "    " << getLowerName() << " = V;\n";
+      OS << "  }";
+    }
+    void writeCloneArgs(raw_ostream &OS) const {
+      OS << "get" << getUpperName() << "()";
+    }
+    void writeCtorBody(raw_ostream &OS) const {
+    }
+    void writeCtorInitializers(raw_ostream &OS) const {
+      OS << getLowerName() << "(" << getUpperName() << ")";
+    }
+    void writeCtorParameters(raw_ostream &OS) const {
+      OS << "VersionTuple " << getUpperName();
+    }
+    void writeDeclarations(raw_ostream &OS) const {
+      OS << "VersionTuple " << getLowerName() << ";\n";
+    }
+    void writePCHReadDecls(raw_ostream &OS) const {
+      OS << "    VersionTuple " << getLowerName()
+         << "= ReadVersionTuple(Record, Idx);\n";
+    }
+    void writePCHReadArgs(raw_ostream &OS) const {
+      OS << getLowerName();
+    }
+    void writePCHWrite(raw_ostream &OS) const {
+      OS << "    AddVersionTuple(SA->get" << getUpperName() << "(), Record);\n";
+    }
+  };
 }
 
 static Argument *createArgument(Record &Arg, StringRef Attr,
@@ -433,6 +477,8 @@ static Argument *createArgument(Record &Arg, StringRef Attr,
     Ptr = new SimpleArgument(Arg, Attr, "FunctionDecl *");
   else if (ArgName == "IdentifierArgument")
     Ptr = new SimpleArgument(Arg, Attr, "IdentifierInfo *");
+  else if (ArgName == "BoolArgument") Ptr = new SimpleArgument(Arg, Attr, 
+                                                               "bool");
   else if (ArgName == "IntArgument") Ptr = new SimpleArgument(Arg, Attr, "int");
   else if (ArgName == "StringArgument") Ptr = new StringArgument(Arg, Attr);
   else if (ArgName == "TypeArgument")
@@ -441,6 +487,8 @@ static Argument *createArgument(Record &Arg, StringRef Attr,
     Ptr = new SimpleArgument(Arg, Attr, "unsigned");
   else if (ArgName == "VariadicUnsignedArgument")
     Ptr = new VariadicArgument(Arg, Attr, "unsigned");
+  else if (ArgName == "VersionArgument")
+    Ptr = new VersionArgument(Arg, Attr);
 
   if (!Ptr) {
     std::vector<Record*> Bases = Search->getSuperClasses();
@@ -589,23 +637,37 @@ void ClangAttrListEmitter::run(raw_ostream &OS) {
   OS << "#define LAST_INHERITABLE_ATTR(NAME) INHERITABLE_ATTR(NAME)\n";
   OS << "#endif\n\n";
 
+  OS << "#ifndef INHERITABLE_PARAM_ATTR\n";
+  OS << "#define INHERITABLE_PARAM_ATTR(NAME) ATTR(NAME)\n";
+  OS << "#endif\n\n";
+
+  OS << "#ifndef LAST_INHERITABLE_PARAM_ATTR\n";
+  OS << "#define LAST_INHERITABLE_PARAM_ATTR(NAME)"
+        " INHERITABLE_PARAM_ATTR(NAME)\n";
+  OS << "#endif\n\n";
+
   Record *InhClass = Records.getClass("InheritableAttr");
+  Record *InhParamClass = Records.getClass("InheritableParamAttr");
   std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr"),
-                       NonInhAttrs, InhAttrs;
+                       NonInhAttrs, InhAttrs, InhParamAttrs;
   for (std::vector<Record*>::iterator i = Attrs.begin(), e = Attrs.end();
        i != e; ++i) {
-    if ((*i)->isSubClassOf(InhClass))
+    if ((*i)->isSubClassOf(InhParamClass))
+      InhParamAttrs.push_back(*i);
+    else if ((*i)->isSubClassOf(InhClass))
       InhAttrs.push_back(*i);
     else
       NonInhAttrs.push_back(*i);
   }
 
+  EmitAttrList(OS, "INHERITABLE_PARAM_ATTR", InhParamAttrs);
   EmitAttrList(OS, "INHERITABLE_ATTR", InhAttrs);
   EmitAttrList(OS, "ATTR", NonInhAttrs);
 
   OS << "#undef LAST_ATTR\n";
   OS << "#undef INHERITABLE_ATTR\n";
   OS << "#undef LAST_INHERITABLE_ATTR\n";
+  OS << "#undef LAST_INHERITABLE_PARAM_ATTR\n";
   OS << "#undef ATTR\n";
 }
 
diff --git a/utils/TableGen/ClangDiagnosticsEmitter.cpp b/utils/TableGen/ClangDiagnosticsEmitter.cpp
index 60e67c4..0f4b606 100644
--- a/utils/TableGen/ClangDiagnosticsEmitter.cpp
+++ b/utils/TableGen/ClangDiagnosticsEmitter.cpp
@@ -19,8 +19,9 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/VectorExtras.h"
-#include <set>
 #include <map>
+#include <algorithm>
+#include <functional>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -121,7 +122,6 @@ namespace {
 } // end anonymous namespace.
 
 
-
 //===----------------------------------------------------------------------===//
 // Warning Tables (.inc file) generation.
 //===----------------------------------------------------------------------===//
@@ -179,6 +179,14 @@ void ClangDiagsDefsEmitter::run(raw_ostream &OS) {
 
     // Category number.
     OS << ", " << CategoryIDs.getID(getDiagnosticCategory(&R, DGParentMap));
+
+    // Brief
+    OS << ", \"";
+    OS.write_escaped(R.getValueAsString("Brief")) << '"';
+
+    // Explanation 
+    OS << ", \"";
+    OS.write_escaped(R.getValueAsString("Explanation")) << '"';
     OS << ")\n";
   }
 }
@@ -294,3 +302,49 @@ void ClangDiagGroupsEmitter::run(raw_ostream &OS) {
     OS << "CATEGORY(\"" << *I << "\")\n";
   OS << "#endif // GET_CATEGORY_TABLE\n\n";
 }
+
+//===----------------------------------------------------------------------===//
+// Diagnostic name index generation
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct RecordIndexElement
+{
+  RecordIndexElement() {}
+  explicit RecordIndexElement(Record const &R):
+    Name(R.getName()) {}
+  
+  std::string Name;
+};
+
+struct RecordIndexElementSorter :
+  public std::binary_function<RecordIndexElement, RecordIndexElement, bool> {
+  
+  bool operator()(RecordIndexElement const &Lhs,
+                  RecordIndexElement const &Rhs) const {
+    return Lhs.Name < Rhs.Name;
+  }
+  
+};
+
+} // end anonymous namespace.
+
+void ClangDiagsIndexNameEmitter::run(raw_ostream &OS) {
+  const std::vector<Record*> &Diags =
+    Records.getAllDerivedDefinitions("Diagnostic");
+  
+  std::vector<RecordIndexElement> Index;
+  Index.reserve(Diags.size());
+  for (unsigned i = 0, e = Diags.size(); i != e; ++i) {
+    const Record &R = *(Diags[i]);    
+    Index.push_back(RecordIndexElement(R));
+  }
+  
+  std::sort(Index.begin(), Index.end(), RecordIndexElementSorter());
+  
+  for (unsigned i = 0, e = Index.size(); i != e; ++i) {
+    const RecordIndexElement &R = Index[i];
+    
+    OS << "DIAG_NAME_INDEX(" << R.Name << ")\n";
+  }
+}
diff --git a/utils/TableGen/ClangDiagnosticsEmitter.h b/utils/TableGen/ClangDiagnosticsEmitter.h
index edd062a..1e4c8b7 100644
--- a/utils/TableGen/ClangDiagnosticsEmitter.h
+++ b/utils/TableGen/ClangDiagnosticsEmitter.h
@@ -33,13 +33,21 @@ public:
 };
 
 class ClangDiagGroupsEmitter : public TableGenBackend {
-    RecordKeeper &Records;
+  RecordKeeper &Records;
 public:
   explicit ClangDiagGroupsEmitter(RecordKeeper &R) : Records(R) {}
     
   void run(raw_ostream &OS);
 };
 
+class ClangDiagsIndexNameEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+public:
+  explicit ClangDiagsIndexNameEmitter(RecordKeeper &R) : Records(R) {}
+  
+  void run(raw_ostream &OS);
+};
+
   
 } // End llvm namespace
 
diff --git a/utils/TableGen/ClangSACheckersEmitter.cpp b/utils/TableGen/ClangSACheckersEmitter.cpp
index 8865db3..97739c6 100644
--- a/utils/TableGen/ClangSACheckersEmitter.cpp
+++ b/utils/TableGen/ClangSACheckersEmitter.cpp
@@ -71,7 +71,7 @@ static std::string getStringValue(const Record &R, StringRef field) {
 
 namespace {
 struct GroupInfo {
-  std::vector<const Record*> Checkers;
+  llvm::DenseSet<const Record*> Checkers;
   llvm::DenseSet<const Record *> SubGroups;
   bool Hidden;
   unsigned Index;
@@ -80,33 +80,24 @@ struct GroupInfo {
 };
 }
 
+static void addPackageToCheckerGroup(const Record *package, const Record *group,
+                  llvm::DenseMap<const Record *, GroupInfo *> &recordGroupMap) {
+  llvm::DenseSet<const Record *> &checkers = recordGroupMap[package]->Checkers;
+  for (llvm::DenseSet<const Record *>::iterator
+         I = checkers.begin(), E = checkers.end(); I != E; ++I)
+    recordGroupMap[group]->Checkers.insert(*I);
+
+  llvm::DenseSet<const Record *> &subGroups = recordGroupMap[package]->SubGroups;
+  for (llvm::DenseSet<const Record *>::iterator
+         I = subGroups.begin(), E = subGroups.end(); I != E; ++I)
+    addPackageToCheckerGroup(*I, group, recordGroupMap);
+}
+
 void ClangSACheckersEmitter::run(raw_ostream &OS) {
   std::vector<Record*> checkers = Records.getAllDerivedDefinitions("Checker");
   llvm::DenseMap<const Record *, unsigned> checkerRecIndexMap;
   for (unsigned i = 0, e = checkers.size(); i != e; ++i)
     checkerRecIndexMap[checkers[i]] = i;
-  
-  OS << "\n#ifdef GET_CHECKERS\n";
-  for (unsigned i = 0, e = checkers.size(); i != e; ++i) {
-    const Record &R = *checkers[i];
-
-    OS << "CHECKER(" << "\"";
-    std::string name;
-    if (isCheckerNamed(&R))
-      name = getCheckerFullName(&R);
-    OS.write_escaped(name) << "\", ";
-    OS << R.getName() << ", ";
-    OS << getStringValue(R, "DescFile") << ", ";
-    OS << "\"";
-    OS.write_escaped(getStringValue(R, "HelpText")) << "\", ";
-    // Hidden bit
-    if (isHidden(R))
-      OS << "true";
-    else
-      OS << "false";
-    OS << ")\n";
-  }
-  OS << "#endif // GET_CHECKERS\n\n";
 
   // Invert the mapping of checkers to package/group into a one to many
   // mapping of packages/groups to checkers.
@@ -150,9 +141,9 @@ void ClangSACheckersEmitter::run(raw_ostream &OS) {
       GroupInfo &info = groupInfoByName[fullName];
       info.Hidden = R->getValueAsBit("Hidden");
       recordGroupMap[R] = &info;
-      info.Checkers.push_back(R);
+      info.Checkers.insert(R);
     } else {
-      recordGroupMap[package]->Checkers.push_back(R);
+      recordGroupMap[package]->Checkers.insert(R);
     }
 
     Record *currR = isCheckerNamed(R) ? R : package;
@@ -166,8 +157,93 @@ void ClangSACheckersEmitter::run(raw_ostream &OS) {
     }
     // Insert the checker into the set of its group.
     if (DefInit *DI = dynamic_cast<DefInit*>(R->getValueInit("Group")))
-      recordGroupMap[DI->getDef()]->Checkers.push_back(R);
+      recordGroupMap[DI->getDef()]->Checkers.insert(R);
+  }
+
+  // If a package is in group, add all its checkers and its sub-packages
+  // checkers into the group.
+  for (unsigned i = 0, e = packages.size(); i != e; ++i)
+    if (DefInit *DI = dynamic_cast<DefInit*>(packages[i]->getValueInit("Group")))
+      addPackageToCheckerGroup(packages[i], DI->getDef(), recordGroupMap);
+
+  typedef std::map<std::string, const Record *> SortedRecords;
+  typedef llvm::DenseMap<const Record *, unsigned> RecToSortIndex;
+
+  SortedRecords sortedGroups;
+  RecToSortIndex groupToSortIndex;
+  OS << "\n#ifdef GET_GROUPS\n";
+  {
+    for (unsigned i = 0, e = checkerGroups.size(); i != e; ++i)
+      sortedGroups[checkerGroups[i]->getValueAsString("GroupName")]
+                   = checkerGroups[i];
+
+    unsigned sortIndex = 0;
+    for (SortedRecords::iterator
+           I = sortedGroups.begin(), E = sortedGroups.end(); I != E; ++I) {
+      const Record *R = I->second;
+  
+      OS << "GROUP(" << "\"";
+      OS.write_escaped(R->getValueAsString("GroupName")) << "\"";
+      OS << ")\n";
+
+      groupToSortIndex[R] = sortIndex++;
+    }
   }
+  OS << "#endif // GET_GROUPS\n\n";
+
+  OS << "\n#ifdef GET_PACKAGES\n";
+  {
+    SortedRecords sortedPackages;
+    for (unsigned i = 0, e = packages.size(); i != e; ++i)
+      sortedPackages[getPackageFullName(packages[i])] = packages[i];
+  
+    for (SortedRecords::iterator
+           I = sortedPackages.begin(), E = sortedPackages.end(); I != E; ++I) {
+      const Record &R = *I->second;
+  
+      OS << "PACKAGE(" << "\"";
+      OS.write_escaped(getPackageFullName(&R)) << "\", ";
+      // Group index
+      if (DefInit *DI = dynamic_cast<DefInit*>(R.getValueInit("Group")))
+        OS << groupToSortIndex[DI->getDef()] << ", ";
+      else
+        OS << "-1, ";
+      // Hidden bit
+      if (isHidden(R))
+        OS << "true";
+      else
+        OS << "false";
+      OS << ")\n";
+    }
+  }
+  OS << "#endif // GET_PACKAGES\n\n";
+  
+  OS << "\n#ifdef GET_CHECKERS\n";
+  for (unsigned i = 0, e = checkers.size(); i != e; ++i) {
+    const Record &R = *checkers[i];
+
+    OS << "CHECKER(" << "\"";
+    std::string name;
+    if (isCheckerNamed(&R))
+      name = getCheckerFullName(&R);
+    OS.write_escaped(name) << "\", ";
+    OS << R.getName() << ", ";
+    OS << getStringValue(R, "DescFile") << ", ";
+    OS << "\"";
+    OS.write_escaped(getStringValue(R, "HelpText")) << "\", ";
+    // Group index
+    if (DefInit *DI = dynamic_cast<DefInit*>(R.getValueInit("Group")))
+      OS << groupToSortIndex[DI->getDef()] << ", ";
+    else
+      OS << "-1, ";
+    // Hidden bit
+    if (isHidden(R))
+      OS << "true";
+    else
+      OS << "false";
+    OS << ")\n";
+  }
+  OS << "#endif // GET_CHECKERS\n\n";
 
   unsigned index = 0;
   for (std::map<std::string, GroupInfo>::iterator
@@ -182,21 +258,34 @@ void ClangSACheckersEmitter::run(raw_ostream &OS) {
   for (std::map<std::string, GroupInfo>::iterator
          I = groupInfoByName.begin(), E = groupInfoByName.end(); I != E; ++I) {
     maxLen = std::max(maxLen, (unsigned)I->first.size());
-    
-    std::vector<const Record*> &V = I->second.Checkers;
-    if (!V.empty()) {
+
+    llvm::DenseSet<const Record *> &checkers = I->second.Checkers;
+    if (!checkers.empty()) {
       OS << "static const short CheckerArray" << I->second.Index << "[] = { ";
-      for (unsigned i = 0, e = V.size(); i != e; ++i)
-        OS << checkerRecIndexMap[V[i]] << ", ";
+      // Make the output order deterministic.
+      std::map<int, const Record *> sorted;
+      for (llvm::DenseSet<const Record *>::iterator
+             I = checkers.begin(), E = checkers.end(); I != E; ++I)
+        sorted[(*I)->getID()] = *I;
+
+      for (std::map<int, const Record *>::iterator
+             I = sorted.begin(), E = sorted.end(); I != E; ++I)
+        OS << checkerRecIndexMap[I->second] << ", ";
       OS << "-1 };\n";
     }
     
     llvm::DenseSet<const Record *> &subGroups = I->second.SubGroups;
     if (!subGroups.empty()) {
       OS << "static const short SubPackageArray" << I->second.Index << "[] = { ";
+      // Make the output order deterministic.
+      std::map<int, const Record *> sorted;
       for (llvm::DenseSet<const Record *>::iterator
-             I = subGroups.begin(), E = subGroups.end(); I != E; ++I) {
-        OS << recordGroupMap[*I]->Index << ", ";
+             I = subGroups.begin(), E = subGroups.end(); I != E; ++I)
+        sorted[(*I)->getID()] = *I;
+
+      for (std::map<int, const Record *>::iterator
+             I = sorted.begin(), E = sorted.end(); I != E; ++I) {
+        OS << recordGroupMap[I->second]->Index << ", ";
       }
       OS << "-1 };\n";
     }
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index 957dd19..9d4dc5c4 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -63,10 +63,14 @@ void CodeEmitterGen::reverseBits(std::vector<Record*> &Insts) {
 // return the variable bit position.  Otherwise return -1.
 int CodeEmitterGen::getVariableBit(const std::string &VarName,
                                    BitsInit *BI, int bit) {
-  if (VarBitInit *VBI = dynamic_cast<VarBitInit*>(BI->getBit(bit)))
+  if (VarBitInit *VBI = dynamic_cast<VarBitInit*>(BI->getBit(bit))) {
     if (VarInit *VI = dynamic_cast<VarInit*>(VBI->getVariable()))
       if (VI->getName() == VarName)
         return VBI->getBitNum();
+  } else if (VarInit *VI = dynamic_cast<VarInit*>(BI->getBit(bit))) {
+    if (VI->getName() == VarName)
+      return 0;
+  }
 
   return -1;
 }
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index aa60f87..a08cde6 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -580,34 +580,29 @@ typedef std::map<std::string, int> DepVarMap;
 /// Const iterator shorthand for DepVarMap
 typedef DepVarMap::const_iterator DepVarMap_citer;
 
-namespace {
-void FindDepVarsOf(TreePatternNode *N, DepVarMap &DepMap) {
+static void FindDepVarsOf(TreePatternNode *N, DepVarMap &DepMap) {
   if (N->isLeaf()) {
-    if (dynamic_cast<DefInit*>(N->getLeafValue()) != NULL) {
+    if (dynamic_cast<DefInit*>(N->getLeafValue()) != NULL)
       DepMap[N->getName()]++;
-    }
   } else {
     for (size_t i = 0, e = N->getNumChildren(); i != e; ++i)
       FindDepVarsOf(N->getChild(i), DepMap);
   }
 }
-
-//! Find dependent variables within child patterns
-/*!
- */
-void FindDepVars(TreePatternNode *N, MultipleUseVarSet &DepVars) {
+  
+/// Find dependent variables within child patterns
+static void FindDepVars(TreePatternNode *N, MultipleUseVarSet &DepVars) {
   DepVarMap depcounts;
   FindDepVarsOf(N, depcounts);
   for (DepVarMap_citer i = depcounts.begin(); i != depcounts.end(); ++i) {
-    if (i->second > 1) {            // std::pair<std::string, int>
+    if (i->second > 1)            // std::pair<std::string, int>
       DepVars.insert(i->first);
-    }
   }
 }
 
-//! Dump the dependent variable set:
 #ifndef NDEBUG
-void DumpDepVars(MultipleUseVarSet &DepVars) {
+/// Dump the dependent variable set:
+static void DumpDepVars(MultipleUseVarSet &DepVars) {
   if (DepVars.empty()) {
     DEBUG(errs() << "<empty set>");
   } else {
@@ -621,6 +616,66 @@ void DumpDepVars(MultipleUseVarSet &DepVars) {
 }
 #endif
 
+
+//===----------------------------------------------------------------------===//
+// TreePredicateFn Implementation
+//===----------------------------------------------------------------------===//
+
+/// TreePredicateFn constructor.  Here 'N' is a subclass of PatFrag.
+TreePredicateFn::TreePredicateFn(TreePattern *N) : PatFragRec(N) {
+  assert((getPredCode().empty() || getImmCode().empty()) &&
+        ".td file corrupt: can't have a node predicate *and* an imm predicate");
+}
+
+std::string TreePredicateFn::getPredCode() const {
+  return PatFragRec->getRecord()->getValueAsCode("PredicateCode");
+}
+
+std::string TreePredicateFn::getImmCode() const {
+  return PatFragRec->getRecord()->getValueAsCode("ImmediateCode");
+}
+
+
+/// isAlwaysTrue - Return true if this is a noop predicate.
+bool TreePredicateFn::isAlwaysTrue() const {
+  return getPredCode().empty() && getImmCode().empty();
+}
+
+/// Return the name to use in the generated code to reference this, this is
+/// "Predicate_foo" if from a pattern fragment "foo".
+std::string TreePredicateFn::getFnName() const {
+  return "Predicate_" + PatFragRec->getRecord()->getName();
+}
+
+/// getCodeToRunOnSDNode - Return the code for the function body that
+/// evaluates this predicate.  The argument is expected to be in "Node",
+/// not N.  This handles casting and conversion to a concrete node type as
+/// appropriate.
+std::string TreePredicateFn::getCodeToRunOnSDNode() const {
+  // Handle immediate predicates first.
+  std::string ImmCode = getImmCode();
+  if (!ImmCode.empty()) {
+    std::string Result =
+      "    int64_t Imm = cast<ConstantSDNode>(Node)->getSExtValue();\n";
+    return Result + ImmCode;
+  }
+  
+  // Handle arbitrary node predicates.
+  assert(!getPredCode().empty() && "Don't have any predicate code!");
+  std::string ClassName;
+  if (PatFragRec->getOnlyTree()->isLeaf())
+    ClassName = "SDNode";
+  else {
+    Record *Op = PatFragRec->getOnlyTree()->getOperator();
+    ClassName = PatFragRec->getDAGPatterns().getSDNodeInfo(Op).getSDClassName();
+  }
+  std::string Result;
+  if (ClassName == "SDNode")
+    Result = "    SDNode *N = Node;\n";
+  else
+    Result = "    " + ClassName + "*N = cast<" + ClassName + ">(Node);\n";
+  
+  return Result + getPredCode();
 }
 
 //===----------------------------------------------------------------------===//
@@ -1015,7 +1070,7 @@ void TreePatternNode::print(raw_ostream &OS) const {
   }
 
   for (unsigned i = 0, e = PredicateFns.size(); i != e; ++i)
-    OS << "<<P:" << PredicateFns[i] << ">>";
+    OS << "<<P:" << PredicateFns[i].getFnName() << ">>";
   if (TransformFn)
     OS << "<<X:" << TransformFn->getName() << ">>";
   if (!getName().empty())
@@ -1150,9 +1205,9 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
 
   TreePatternNode *FragTree = Frag->getOnlyTree()->clone();
 
-  std::string Code = Op->getValueAsCode("Predicate");
-  if (!Code.empty())
-    FragTree->addPredicateFn("Predicate_"+Op->getName());
+  TreePredicateFn PredFn(Frag);
+  if (!PredFn.isAlwaysTrue())
+    FragTree->addPredicateFn(PredFn);
 
   // Resolve formal arguments to their actual value.
   if (Frag->getNumArgs()) {
@@ -2063,9 +2118,9 @@ void CodeGenDAGPatterns::ParsePatternFragments() {
 
     // If there is a code init for this fragment, keep track of the fact that
     // this fragment uses it.
-    std::string Code = Fragments[i]->getValueAsCode("Predicate");
-    if (!Code.empty())
-      P->getOnlyTree()->addPredicateFn("Predicate_"+Fragments[i]->getName());
+    TreePredicateFn PredFn(P);
+    if (!PredFn.isAlwaysTrue())
+      P->getOnlyTree()->addPredicateFn(PredFn);
 
     // If there is a node transformation corresponding to this, keep track of
     // it.
@@ -2288,13 +2343,14 @@ class InstAnalyzer {
   const CodeGenDAGPatterns &CDP;
   bool &mayStore;
   bool &mayLoad;
+  bool &IsBitcast;
   bool &HasSideEffects;
   bool &IsVariadic;
 public:
   InstAnalyzer(const CodeGenDAGPatterns &cdp,
-               bool &maystore, bool &mayload, bool &hse, bool &isv)
-    : CDP(cdp), mayStore(maystore), mayLoad(mayload), HasSideEffects(hse),
-      IsVariadic(isv) {
+               bool &maystore, bool &mayload, bool &isbc, bool &hse, bool &isv)
+    : CDP(cdp), mayStore(maystore), mayLoad(mayload), IsBitcast(isbc),
+      HasSideEffects(hse), IsVariadic(isv) {
   }
 
   /// Analyze - Analyze the specified instruction, returning true if the
@@ -2313,6 +2369,29 @@ public:
   }
 
 private:
+  bool IsNodeBitcast(const TreePatternNode *N) const {
+    if (HasSideEffects || mayLoad || mayStore || IsVariadic)
+      return false;
+
+    if (N->getNumChildren() != 2)
+      return false;
+
+    const TreePatternNode *N0 = N->getChild(0);
+    if (!N0->isLeaf() || !dynamic_cast<DefInit*>(N0->getLeafValue()))
+      return false;
+
+    const TreePatternNode *N1 = N->getChild(1);
+    if (N1->isLeaf())
+      return false;
+    if (N1->getNumChildren() != 1 || !N1->getChild(0)->isLeaf())
+      return false;
+
+    const SDNodeInfo &OpInfo = CDP.getSDNodeInfo(N1->getOperator());
+    if (OpInfo.getNumResults() != 1 || OpInfo.getNumOperands() != 1)
+      return false;
+    return OpInfo.getEnumName() == "ISD::BITCAST";
+  }
+
   void AnalyzeNode(const TreePatternNode *N) {
     if (N->isLeaf()) {
       if (DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue())) {
@@ -2333,8 +2412,10 @@ private:
       AnalyzeNode(N->getChild(i));
 
     // Ignore set nodes, which are not SDNodes.
-    if (N->getOperator()->getName() == "set")
+    if (N->getOperator()->getName() == "set") {
+      IsBitcast = IsNodeBitcast(N);
       return;
+    }
 
     // Get information about the SDNode for the operator.
     const SDNodeInfo &OpInfo = CDP.getSDNodeInfo(N->getOperator());
@@ -2363,12 +2444,13 @@ private:
 
 static void InferFromPattern(const CodeGenInstruction &Inst,
                              bool &MayStore, bool &MayLoad,
+                             bool &IsBitcast,
                              bool &HasSideEffects, bool &IsVariadic,
                              const CodeGenDAGPatterns &CDP) {
-  MayStore = MayLoad = HasSideEffects = IsVariadic = false;
+  MayStore = MayLoad = IsBitcast = HasSideEffects = IsVariadic = false;
 
   bool HadPattern =
-    InstAnalyzer(CDP, MayStore, MayLoad, HasSideEffects, IsVariadic)
+    InstAnalyzer(CDP, MayStore, MayLoad, IsBitcast, HasSideEffects, IsVariadic)
     .Analyze(Inst.TheDef);
 
   // InstAnalyzer only correctly analyzes mayStore/mayLoad so far.
@@ -2714,11 +2796,12 @@ void CodeGenDAGPatterns::InferInstructionFlags() {
     CodeGenInstruction &InstInfo =
       const_cast<CodeGenInstruction &>(*Instructions[i]);
     // Determine properties of the instruction from its pattern.
-    bool MayStore, MayLoad, HasSideEffects, IsVariadic;
-    InferFromPattern(InstInfo, MayStore, MayLoad, HasSideEffects, IsVariadic,
-                     *this);
+    bool MayStore, MayLoad, IsBitcast, HasSideEffects, IsVariadic;
+    InferFromPattern(InstInfo, MayStore, MayLoad, IsBitcast,
+                     HasSideEffects, IsVariadic, *this);
     InstInfo.mayStore = MayStore;
     InstInfo.mayLoad = MayLoad;
+    InstInfo.isBitcast = IsBitcast;
     InstInfo.hasSideEffects = HasSideEffects;
     InstInfo.Operands.isVariadic = IsVariadic;
   }
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 946dcee..e4e8574 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -239,6 +239,57 @@ public:
     return MadeChange;
   }
 };
+  
+/// TreePredicateFn - This is an abstraction that represents the predicates on
+/// a PatFrag node.  This is a simple one-word wrapper around a pointer to
+/// provide nice accessors.
+class TreePredicateFn {
+  /// PatFragRec - This is the TreePattern for the PatFrag that we
+  /// originally came from.
+  TreePattern *PatFragRec;
+public:
+  /// TreePredicateFn constructor.  Here 'N' is a subclass of PatFrag.
+  TreePredicateFn(TreePattern *N);
+
+  
+  TreePattern *getOrigPatFragRecord() const { return PatFragRec; }
+  
+  /// isAlwaysTrue - Return true if this is a noop predicate.
+  bool isAlwaysTrue() const;
+  
+  bool isImmediatePattern() const { return !getImmCode().empty(); }
+  
+  /// getImmediatePredicateCode - Return the code that evaluates this pattern if
+  /// this is an immediate predicate.  It is an error to call this on a
+  /// non-immediate pattern.
+  std::string getImmediatePredicateCode() const {
+    std::string Result = getImmCode();
+    assert(!Result.empty() && "Isn't an immediate pattern!");
+    return Result;
+  }
+  
+  
+  bool operator==(const TreePredicateFn &RHS) const {
+    return PatFragRec == RHS.PatFragRec;
+  }
+
+  bool operator!=(const TreePredicateFn &RHS) const { return !(*this == RHS); }
+
+  /// Return the name to use in the generated code to reference this, this is
+  /// "Predicate_foo" if from a pattern fragment "foo".
+  std::string getFnName() const;
+  
+  /// getCodeToRunOnSDNode - Return the code for the function body that
+  /// evaluates this predicate.  The argument is expected to be in "Node",
+  /// not N.  This handles casting and conversion to a concrete node type as
+  /// appropriate.
+  std::string getCodeToRunOnSDNode() const;
+  
+private:
+  std::string getPredCode() const;
+  std::string getImmCode() const;
+};
+  
 
 /// FIXME: TreePatternNode's can be shared in some cases (due to dag-shaped
 /// patterns), and as such should be ref counted.  We currently just leak all
@@ -263,7 +314,7 @@ class TreePatternNode {
 
   /// PredicateFns - The predicate functions to execute on this node to check
   /// for a match.  If this list is empty, no predicate is involved.
-  std::vector<std::string> PredicateFns;
+  std::vector<TreePredicateFn> PredicateFns;
 
   /// TransformFn - The transformation function to execute on this node before
   /// it can be substituted into the resulting instruction on a pattern match.
@@ -323,14 +374,18 @@ public:
     return false;
   }
 
-  const std::vector<std::string> &getPredicateFns() const {return PredicateFns;}
+  bool hasAnyPredicate() const { return !PredicateFns.empty(); }
+  
+  const std::vector<TreePredicateFn> &getPredicateFns() const {
+    return PredicateFns;
+  }
   void clearPredicateFns() { PredicateFns.clear(); }
-  void setPredicateFns(const std::vector<std::string> &Fns) {
+  void setPredicateFns(const std::vector<TreePredicateFn> &Fns) {
     assert(PredicateFns.empty() && "Overwriting non-empty predicate list!");
     PredicateFns = Fns;
   }
-  void addPredicateFn(const std::string &Fn) {
-    assert(!Fn.empty() && "Empty predicate string!");
+  void addPredicateFn(const TreePredicateFn &Fn) {
+    assert(!Fn.isAlwaysTrue() && "Empty predicate string!");
     if (std::find(PredicateFns.begin(), PredicateFns.end(), Fn) ==
           PredicateFns.end())
       PredicateFns.push_back(Fn);
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index f37d3ea..5b0aedf 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -288,6 +288,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R) : TheDef(R), Operands(R) {
   isIndirectBranch = R->getValueAsBit("isIndirectBranch");
   isCompare    = R->getValueAsBit("isCompare");
   isMoveImm    = R->getValueAsBit("isMoveImm");
+  isBitcast    = R->getValueAsBit("isBitcast");
   isBarrier    = R->getValueAsBit("isBarrier");
   isCall       = R->getValueAsBit("isCall");
   canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index 58913b9..5f1e0be 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -26,7 +26,7 @@ namespace llvm {
   class DagInit;
   class CodeGenTarget;
   class StringRef;
-  
+
   class CGIOperandList {
   public:
     class ConstraintInfo {
@@ -34,25 +34,25 @@ namespace llvm {
       unsigned OtherTiedOperand;
     public:
       ConstraintInfo() : Kind(None) {}
-      
+
       static ConstraintInfo getEarlyClobber() {
         ConstraintInfo I;
         I.Kind = EarlyClobber;
         I.OtherTiedOperand = 0;
         return I;
       }
-      
+
       static ConstraintInfo getTied(unsigned Op) {
         ConstraintInfo I;
         I.Kind = Tied;
         I.OtherTiedOperand = Op;
         return I;
       }
-      
+
       bool isNone() const { return Kind == None; }
       bool isEarlyClobber() const { return Kind == EarlyClobber; }
       bool isTied() const { return Kind == Tied; }
-      
+
       unsigned getTiedOperand() const {
         assert(isTied());
         return OtherTiedOperand;
@@ -65,19 +65,19 @@ namespace llvm {
       /// Rec - The definition this operand is declared as.
       ///
       Record *Rec;
-      
+
       /// Name - If this operand was assigned a symbolic name, this is it,
       /// otherwise, it's empty.
       std::string Name;
-      
+
       /// PrinterMethodName - The method used to print operands of this type in
       /// the asmprinter.
       std::string PrinterMethodName;
-      
+
       /// EncoderMethodName - The method used to get the machine operand value
       /// for binary encoding. "getMachineOpValue" by default.
       std::string EncoderMethodName;
-      
+
       /// MIOperandNo - Currently (this is meant to be phased out), some logical
       /// operands correspond to multiple MachineInstr operands.  In the X86
       /// target for example, one address operand is represented as 4
@@ -86,27 +86,27 @@ namespace llvm {
       /// does, this contains the MI operand index of this operand.
       unsigned MIOperandNo;
       unsigned MINumOperands;   // The number of operands.
-      
+
       /// DoNotEncode - Bools are set to true in this vector for each operand in
       /// the DisableEncoding list.  These should not be emitted by the code
       /// emitter.
       std::vector<bool> DoNotEncode;
-      
+
       /// MIOperandInfo - Default MI operand type. Note an operand may be made
       /// up of multiple MI operands.
       DagInit *MIOperandInfo;
-      
+
       /// Constraint info for this operand.  This operand can have pieces, so we
       /// track constraint info for each.
       std::vector<ConstraintInfo> Constraints;
-      
+
       OperandInfo(Record *R, const std::string &N, const std::string &PMN,
                   const std::string &EMN, unsigned MION, unsigned MINO,
                   DagInit *MIOI)
       : Rec(R), Name(N), PrinterMethodName(PMN), EncoderMethodName(EMN),
         MIOperandNo(MION), MINumOperands(MINO), MIOperandInfo(MIOI) {}
-      
-      
+
+
       /// getTiedOperand - If this operand is tied to another one, return the
       /// other operand number.  Otherwise, return -1.
       int getTiedRegister() const {
@@ -117,43 +117,44 @@ namespace llvm {
         return -1;
       }
     };
-    
+
     CGIOperandList(Record *D);
-    
+
     Record *TheDef;            // The actual record containing this OperandList.
 
     /// NumDefs - Number of def operands declared, this is the number of
     /// elements in the instruction's (outs) list.
     ///
     unsigned NumDefs;
-    
+
     /// OperandList - The list of declared operands, along with their declared
     /// type (which is a record).
     std::vector<OperandInfo> OperandList;
-    
+
     // Information gleaned from the operand list.
     bool isPredicable;
     bool hasOptionalDef;
     bool isVariadic;
-    
+
     // Provide transparent accessors to the operand list.
+    bool empty() const { return OperandList.empty(); }
     unsigned size() const { return OperandList.size(); }
     const OperandInfo &operator[](unsigned i) const { return OperandList[i]; }
     OperandInfo &operator[](unsigned i) { return OperandList[i]; }
     OperandInfo &back() { return OperandList.back(); }
     const OperandInfo &back() const { return OperandList.back(); }
-    
-    
+
+
     /// getOperandNamed - Return the index of the operand with the specified
     /// non-empty name.  If the instruction does not have an operand with the
     /// specified name, throw an exception.
     unsigned getOperandNamed(StringRef Name) const;
-    
+
     /// hasOperandNamed - Query whether the instruction has an operand of the
     /// given name. If so, return true and set OpIdx to the index of the
     /// operand. Otherwise, return false.
     bool hasOperandNamed(StringRef Name, unsigned &OpIdx) const;
-      
+
     /// ParseOperandName - Parse an operand name like "$foo" or "$foo.bar",
     /// where $foo is a whole operand and $foo.bar refers to a suboperand.
     /// This throws an exception if the name is invalid.  If AllowWholeOp is
@@ -161,13 +162,13 @@ namespace llvm {
     /// not.
     std::pair<unsigned,unsigned> ParseOperandName(const std::string &Op,
                                                   bool AllowWholeOp = true);
-    
+
     /// getFlattenedOperandNumber - Flatten a operand/suboperand pair into a
     /// flat machineinstr operand #.
     unsigned getFlattenedOperandNumber(std::pair<unsigned,unsigned> Op) const {
       return OperandList[Op.first].MIOperandNo + Op.second;
     }
-    
+
     /// getSubOperandNumber - Unflatten a operand number into an
     /// operand/suboperand pair.
     std::pair<unsigned,unsigned> getSubOperandNumber(unsigned Op) const {
@@ -177,8 +178,8 @@ namespace llvm {
           return std::make_pair(i, Op-OperandList[i].MIOperandNo);
       }
     }
-    
-    
+
+
     /// isFlatOperandNotEmitted - Return true if the specified flat operand #
     /// should not be emitted with the code emitter.
     bool isFlatOperandNotEmitted(unsigned FlatOpNo) const {
@@ -187,10 +188,10 @@ namespace llvm {
         return OperandList[Op.first].DoNotEncode[Op.second];
       return false;
     }
-    
+
     void ProcessDisableEncoding(std::string Value);
   };
-  
+
 
   class CodeGenInstruction {
   public:
@@ -215,6 +216,7 @@ namespace llvm {
     bool isIndirectBranch;
     bool isCompare;
     bool isMoveImm;
+    bool isBitcast;
     bool isBarrier;
     bool isCall;
     bool canFoldAsLoad;
@@ -242,45 +244,45 @@ namespace llvm {
     /// MVT::Other.
     MVT::SimpleValueType
       HasOneImplicitDefWithKnownVT(const CodeGenTarget &TargetInfo) const;
-    
-    
+
+
     /// FlattenAsmStringVariants - Flatten the specified AsmString to only
     /// include text from the specified variant, returning the new string.
     static std::string FlattenAsmStringVariants(StringRef AsmString,
                                                 unsigned Variant);
   };
-  
-  
+
+
   /// CodeGenInstAlias - This represents an InstAlias definition.
   class CodeGenInstAlias {
   public:
     Record *TheDef;            // The actual record defining this InstAlias.
-    
+
     /// AsmString - The format string used to emit a .s file for the
     /// instruction.
     std::string AsmString;
-    
+
     /// Result - The result instruction.
     DagInit *Result;
-    
+
     /// ResultInst - The instruction generated by the alias (decoded from
     /// Result).
     CodeGenInstruction *ResultInst;
-    
-    
+
+
     struct ResultOperand {
     private:
       StringRef Name;
       Record *R;
-      
+
       int64_t Imm;
-    public:      
+    public:
       enum {
         K_Record,
         K_Imm,
         K_Reg
       } Kind;
-      
+
       ResultOperand(StringRef N, Record *r) : Name(N), R(r), Kind(K_Record) {}
       ResultOperand(int64_t I) : Imm(I), Kind(K_Imm) {}
       ResultOperand(Record *r) : R(r), Kind(K_Reg) {}
@@ -288,13 +290,13 @@ namespace llvm {
       bool isRecord() const { return Kind == K_Record; }
       bool isImm() const { return Kind == K_Imm; }
       bool isReg() const { return Kind == K_Reg; }
-      
+
       StringRef getName() const { assert(isRecord()); return Name; }
       Record *getRecord() const { assert(isRecord()); return R; }
       int64_t getImm() const { assert(isImm()); return Imm; }
       Record *getRegister() const { assert(isReg()); return R; }
     };
-    
+
     /// ResultOperands - The decoded operands for the result instruction.
     std::vector<ResultOperand> ResultOperands;
 
@@ -304,13 +306,13 @@ namespace llvm {
     /// index specifies the suboperand.  If there are no suboperands or if all
     /// of them are matched by the operand, the second value should be -1.
     std::vector<std::pair<unsigned, int> > ResultInstOperandIndex;
-    
+
     CodeGenInstAlias(Record *R, CodeGenTarget &T);
 
     bool tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
                          Record *InstOpRec, bool hasSubOps, SMLoc Loc,
                          CodeGenTarget &T, ResultOperand &ResOp);
-  };    
+  };
 }
 
 #endif
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index bbd0cef..39b92c5 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -29,7 +29,8 @@ namespace llvm {
   struct CodeGenRegister {
     Record *TheDef;
     const std::string &getName() const;
-    unsigned DeclaredSpillSize, DeclaredSpillAlignment;
+    unsigned EnumValue;
+    unsigned CostPerUse;
     CodeGenRegister(Record *R);
   };
 
@@ -49,23 +50,23 @@ namespace llvm {
     const std::string &getName() const;
     const std::vector<MVT::SimpleValueType> &getValueTypes() const {return VTs;}
     unsigned getNumValueTypes() const { return VTs.size(); }
-    
+
     MVT::SimpleValueType getValueTypeNum(unsigned VTNum) const {
       if (VTNum < VTs.size())
         return VTs[VTNum];
       assert(0 && "VTNum greater than number of ValueTypes in RegClass!");
       abort();
     }
-    
+
     bool containsRegister(Record *R) const {
       for (unsigned i = 0, e = Elements.size(); i != e; ++i)
         if (Elements[i] == R) return true;
       return false;
     }
-    
+
     // Returns true if RC is a strict subclass.
     // RC is a sub-class of this class if it is a valid replacement for any
-    // instruction operand where a register of this classis required. It must 
+    // instruction operand where a register of this classis required. It must
     // satisfy these conditions:
     //
     // 1. All RC registers are also in this.
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index d0f7d8b..57f7fdc 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -164,11 +164,13 @@ void CodeGenTarget::ReadRegisters() const {
 
   Registers.reserve(Regs.size());
   Registers.assign(Regs.begin(), Regs.end());
+  // Assign the enumeration values.
+  for (unsigned i = 0, e = Registers.size(); i != e; ++i)
+    Registers[i].EnumValue = i + 1;
 }
 
 CodeGenRegister::CodeGenRegister(Record *R) : TheDef(R) {
-  DeclaredSpillSize = R->getValueAsInt("SpillSize");
-  DeclaredSpillAlignment = R->getValueAsInt("SpillAlignment");
+  CostPerUse = R->getValueAsInt("CostPerUse");
 }
 
 const std::string &CodeGenRegister::getName() const {
@@ -199,7 +201,7 @@ const CodeGenRegister *CodeGenTarget::getRegisterByName(StringRef Name) const {
     if (Reg.TheDef->getValueAsString("AsmName") == Name)
       return &Reg;
   }
-  
+
   return 0;
 }
 
@@ -216,7 +218,7 @@ getRegisterVTs(Record *R) const {
       }
     }
   }
-  
+
   // Remove duplicates.
   array_pod_sort(Result.begin(), Result.end());
   Result.erase(std::unique(Result.begin(), Result.end()), Result.end());
@@ -229,8 +231,8 @@ CodeGenRegisterClass::CodeGenRegisterClass(Record *R) : TheDef(R) {
   if (R->getName().size() > 9 && R->getName()[9] == '.') {
     static unsigned AnonCounter = 0;
     R->setName("AnonRegClass_"+utostr(AnonCounter++));
-  } 
-  
+  }
+
   std::vector<Record*> TypeList = R->getValueAsListOfDefs("RegTypes");
   for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
     Record *Type = TypeList[i];
@@ -240,7 +242,7 @@ CodeGenRegisterClass::CodeGenRegisterClass(Record *R) : TheDef(R) {
     VTs.push_back(getValueType(Type));
   }
   assert(!VTs.empty() && "RegisterClass must contain at least one ValueType!");
-  
+
   std::vector<Record*> RegList = R->getValueAsListOfDefs("MemberList");
   for (unsigned i = 0, e = RegList.size(); i != e; ++i) {
     Record *Reg = RegList[i];
@@ -293,7 +295,7 @@ void CodeGenTarget::ReadLegalValueTypes() const {
   for (unsigned i = 0, e = RCs.size(); i != e; ++i)
     for (unsigned ri = 0, re = RCs[i].VTs.size(); ri != re; ++ri)
       LegalValueTypes.push_back(RCs[i].VTs[ri]);
-  
+
   // Remove duplicates.
   std::sort(LegalValueTypes.begin(), LegalValueTypes.end());
   LegalValueTypes.erase(std::unique(LegalValueTypes.begin(),
@@ -314,10 +316,10 @@ void CodeGenTarget::ReadInstructions() const {
 
 static const CodeGenInstruction *
 GetInstByName(const char *Name,
-              const DenseMap<const Record*, CodeGenInstruction*> &Insts, 
+              const DenseMap<const Record*, CodeGenInstruction*> &Insts,
               RecordKeeper &Records) {
   const Record *Rec = Records.getDef(Name);
-  
+
   DenseMap<const Record*, CodeGenInstruction*>::const_iterator
     I = Insts.find(Rec);
   if (Rec == 0 || I == Insts.end())
@@ -434,7 +436,7 @@ ComplexPattern::ComplexPattern(Record *R) {
 std::vector<CodeGenIntrinsic> llvm::LoadIntrinsics(const RecordKeeper &RC,
                                                    bool TargetOnly) {
   std::vector<Record*> I = RC.getAllDerivedDefinitions("Intrinsic");
-  
+
   std::vector<CodeGenIntrinsic> Result;
 
   for (unsigned i = 0, e = I.size(); i != e; ++i) {
@@ -451,8 +453,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
   ModRef = ReadWriteMem;
   isOverloaded = false;
   isCommutative = false;
-  
-  if (DefName.size() <= 4 || 
+
+  if (DefName.size() <= 4 ||
       std::string(DefName.begin(), DefName.begin() + 4) != "int_")
     throw "Intrinsic '" + DefName + "' does not start with 'int_'!";
 
@@ -472,11 +474,11 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
       Name += (EnumName[i] == '_') ? '.' : EnumName[i];
   } else {
     // Verify it starts with "llvm.".
-    if (Name.size() <= 5 || 
+    if (Name.size() <= 5 ||
         std::string(Name.begin(), Name.begin() + 5) != "llvm.")
       throw "Intrinsic '" + DefName + "'s name does not start with 'llvm.'!";
   }
-  
+
   // If TargetPrefix is specified, make sure that Name starts with
   // "llvm.<targetprefix>.".
   if (!TargetPrefix.empty()) {
@@ -486,7 +488,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
       throw "Intrinsic '" + DefName + "' does not start with 'llvm." +
         TargetPrefix + ".'!";
   }
-  
+
   // Parse the list of return types.
   std::vector<MVT::SimpleValueType> OverloadedVTs;
   ListInit *TypeList = R->getValueAsListInit("RetTypes");
@@ -517,11 +519,11 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
     // Reject invalid types.
     if (VT == MVT::isVoid)
       throw "Intrinsic '" + DefName + " has void in result type list!";
-    
+
     IS.RetVTs.push_back(VT);
     IS.RetTypeDefs.push_back(TyEl);
   }
-  
+
   // Parse the list of parameter types.
   TypeList = R->getValueAsListInit("ParamTypes");
   for (unsigned i = 0, e = TypeList->getSize(); i != e; ++i) {
@@ -542,16 +544,16 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
              "Expected iAny or vAny type");
     } else
       VT = getValueType(TyEl->getValueAsDef("VT"));
-    
+
     if (EVT(VT).isOverloaded()) {
       OverloadedVTs.push_back(VT);
       isOverloaded = true;
     }
-    
+
     // Reject invalid types.
     if (VT == MVT::isVoid && i != e-1 /*void at end means varargs*/)
       throw "Intrinsic '" + DefName + " has void in result type list!";
-    
+
     IS.ParamVTs.push_back(VT);
     IS.ParamTypeDefs.push_back(TyEl);
   }
@@ -562,7 +564,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
     Record *Property = PropList->getElementAsRecord(i);
     assert(Property->isSubClassOf("IntrinsicProperty") &&
            "Expected a property!");
-    
+
     if (Property->getName() == "IntrNoMem")
       ModRef = NoMem;
     else if (Property->getName() == "IntrReadArgMem")
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index f1058eb..4e04154 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -32,8 +32,8 @@ class CodeGenTarget;
 //  SDNPMemOperand: indicates that a node touches memory and therefore must
 //                  have an associated memory operand that describes the access.
 enum SDNP {
-  SDNPCommutative, 
-  SDNPAssociative, 
+  SDNPCommutative,
+  SDNPAssociative,
   SDNPHasChain,
   SDNPOutGlue,
   SDNPInGlue,
@@ -57,7 +57,7 @@ std::string getEnumName(MVT::SimpleValueType T);
 /// getQualifiedName - Return the name of the specified record, with a
 /// namespace qualifier if the record contains one.
 std::string getQualifiedName(const Record *R);
-  
+
 /// CodeGenTarget - This class corresponds to the Target class in the .td files.
 ///
 class CodeGenTarget {
@@ -74,7 +74,7 @@ class CodeGenTarget {
   void ReadRegisterClasses() const;
   void ReadInstructions() const;
   void ReadLegalValueTypes() const;
-  
+
   mutable std::vector<const CodeGenInstruction*> InstrsByEnum;
 public:
   CodeGenTarget(RecordKeeper &Records);
@@ -102,7 +102,7 @@ public:
     if (Registers.empty()) ReadRegisters();
     return Registers;
   }
-  
+
   /// getRegisterByName - If there is a register with the specific AsmName,
   /// return it.
   const CodeGenRegister *getRegisterByName(StringRef Name) const;
@@ -134,7 +134,7 @@ public:
     assert(0 && "Didn't find the register class");
     abort();
   }
-  
+
   /// getRegisterClassForRegister - Find the register class that contains the
   /// specified physical register.  If the register is not in a register
   /// class, return null. If the register is in multiple classes, and the
@@ -192,19 +192,19 @@ public:
   /// getRegisterVTs - Find the union of all possible SimpleValueTypes for the
   /// specified physical register.
   std::vector<MVT::SimpleValueType> getRegisterVTs(Record *R) const;
-  
+
   const std::vector<MVT::SimpleValueType> &getLegalValueTypes() const {
     if (LegalValueTypes.empty()) ReadLegalValueTypes();
     return LegalValueTypes;
   }
-  
+
   /// isLegalValueType - Return true if the specified value type is natively
   /// supported by the target (i.e. there are registers that directly hold it).
   bool isLegalValueType(MVT::SimpleValueType VT) const {
     const std::vector<MVT::SimpleValueType> &LegalVTs = getLegalValueTypes();
     for (unsigned i = 0, e = LegalVTs.size(); i != e; ++i)
       if (LegalVTs[i] == VT) return true;
-    return false;    
+    return false;
   }
 
 private:
@@ -213,7 +213,7 @@ private:
     return Instructions;
   }
 public:
-  
+
   CodeGenInstruction &getInstruction(const Record *InstRec) const {
     if (Instructions.empty()) ReadInstructions();
     DenseMap<const Record*, CodeGenInstruction*>::iterator I =
@@ -233,12 +233,12 @@ public:
   typedef std::vector<const CodeGenInstruction*>::const_iterator inst_iterator;
   inst_iterator inst_begin() const{return getInstructionsByEnumValue().begin();}
   inst_iterator inst_end() const { return getInstructionsByEnumValue().end(); }
-  
-  
+
+
   /// isLittleEndianEncoding - are instruction bit patterns defined as  [0..n]?
   ///
   bool isLittleEndianEncoding() const;
-  
+
 private:
   void ComputeInstrsByEnum() const;
 };
diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp
index 8a73404..d66ae96 100644
--- a/utils/TableGen/DAGISelEmitter.cpp
+++ b/utils/TableGen/DAGISelEmitter.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
 static unsigned getResultPatternCost(TreePatternNode *P,
                                      CodeGenDAGPatterns &CGP) {
   if (P->isLeaf()) return 0;
-  
+
   unsigned Cost = 0;
   Record *Op = P->getOperator();
   if (Op->isSubClassOf("Instruction")) {
@@ -43,7 +43,7 @@ static unsigned getResultPatternCost(TreePatternNode *P,
 
 /// getResultPatternCodeSize - Compute the code size of instructions for this
 /// pattern.
-static unsigned getResultPatternSize(TreePatternNode *P, 
+static unsigned getResultPatternSize(TreePatternNode *P,
                                      CodeGenDAGPatterns &CGP) {
   if (P->isLeaf()) return 0;
 
@@ -64,21 +64,21 @@ namespace {
 struct PatternSortingPredicate {
   PatternSortingPredicate(CodeGenDAGPatterns &cgp) : CGP(cgp) {}
   CodeGenDAGPatterns &CGP;
-  
+
   bool operator()(const PatternToMatch *LHS, const PatternToMatch *RHS) {
     const TreePatternNode *LHSSrc = LHS->getSrcPattern();
     const TreePatternNode *RHSSrc = RHS->getSrcPattern();
-    
+
     if (LHSSrc->getNumTypes() != 0 && RHSSrc->getNumTypes() != 0 &&
         LHSSrc->getType(0) != RHSSrc->getType(0)) {
       MVT::SimpleValueType V1 = LHSSrc->getType(0), V2 = RHSSrc->getType(0);
       if (MVT(V1).isVector() != MVT(V2).isVector())
         return MVT(V2).isVector();
-      
+
       if (MVT(V1).isFloatingPoint() != MVT(V2).isFloatingPoint())
         return MVT(V2).isFloatingPoint();
     }
-    
+
     // Otherwise, if the patterns might both match, sort based on complexity,
     // which means that we prefer to match patterns that cover more nodes in the
     // input over nodes that cover fewer.
@@ -86,18 +86,18 @@ struct PatternSortingPredicate {
     unsigned RHSSize = RHS->getPatternComplexity(CGP);
     if (LHSSize > RHSSize) return true;   // LHS -> bigger -> less cost
     if (LHSSize < RHSSize) return false;
-    
+
     // If the patterns have equal complexity, compare generated instruction cost
     unsigned LHSCost = getResultPatternCost(LHS->getDstPattern(), CGP);
     unsigned RHSCost = getResultPatternCost(RHS->getDstPattern(), CGP);
     if (LHSCost < RHSCost) return true;
     if (LHSCost > RHSCost) return false;
-    
+
     unsigned LHSPatSize = getResultPatternSize(LHS->getDstPattern(), CGP);
     unsigned RHSPatSize = getResultPatternSize(RHS->getDstPattern(), CGP);
     if (LHSPatSize < RHSPatSize) return true;
     if (LHSPatSize > RHSPatSize) return false;
-    
+
     // Sort based on the UID of the pattern, giving us a deterministic ordering
     // if all other sorting conditions fail.
     assert(LHS == RHS || LHS->ID != RHS->ID);
@@ -110,7 +110,7 @@ struct PatternSortingPredicate {
 void DAGISelEmitter::run(raw_ostream &OS) {
   EmitSourceFileHeader("DAG Instruction Selector for the " +
                        CGP.getTargetInfo().getName() + " target", OS);
-  
+
   OS << "// *** NOTE: This file is #included into the middle of the target\n"
      << "// *** instruction selector class.  These functions are really "
      << "methods.\n\n";
@@ -132,8 +132,8 @@ void DAGISelEmitter::run(raw_ostream &OS) {
   // We want to process the matches in order of minimal cost.  Sort the patterns
   // so the least cost one is at the start.
   std::sort(Patterns.begin(), Patterns.end(), PatternSortingPredicate(CGP));
-  
-  
+
+
   // Convert each variant of each pattern into a Matcher.
   std::vector<Matcher*> PatternMatchers;
   for (unsigned i = 0, e = Patterns.size(); i != e; ++i) {
@@ -144,7 +144,7 @@ void DAGISelEmitter::run(raw_ostream &OS) {
         break;
     }
   }
-          
+
   Matcher *TheMatcher = new ScopeMatcher(&PatternMatchers[0],
                                          PatternMatchers.size());
 
diff --git a/utils/TableGen/DAGISelEmitter.h b/utils/TableGen/DAGISelEmitter.h
index 2117e65..35ab550 100644
--- a/utils/TableGen/DAGISelEmitter.h
+++ b/utils/TableGen/DAGISelEmitter.h
@@ -16,7 +16,6 @@
 
 #include "TableGenBackend.h"
 #include "CodeGenDAGPatterns.h"
-#include <set>
 
 namespace llvm {
 
diff --git a/utils/TableGen/DAGISelMatcher.cpp b/utils/TableGen/DAGISelMatcher.cpp
index 2afa2b9..b12e101 100644
--- a/utils/TableGen/DAGISelMatcher.cpp
+++ b/utils/TableGen/DAGISelMatcher.cpp
@@ -83,6 +83,15 @@ ScopeMatcher::~ScopeMatcher() {
 }
 
 
+CheckPredicateMatcher::CheckPredicateMatcher(const TreePredicateFn &pred)
+  : Matcher(CheckPredicate), Pred(pred.getOrigPatFragRecord()) {}
+
+TreePredicateFn CheckPredicateMatcher::getPredicate() const {
+  return TreePredicateFn(Pred);
+}
+
+
+
 // printImpl methods.
 
 void ScopeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
@@ -129,7 +138,7 @@ printImpl(raw_ostream &OS, unsigned indent) const {
 }
 
 void CheckPredicateMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
-  OS.indent(indent) << "CheckPredicate " << PredName << '\n';
+  OS.indent(indent) << "CheckPredicate " << getPredicate().getFnName() << '\n';
 }
 
 void CheckOpcodeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
@@ -263,7 +272,7 @@ unsigned CheckPatternPredicateMatcher::getHashImpl() const {
 }
 
 unsigned CheckPredicateMatcher::getHashImpl() const {
-  return HashString(PredName);
+  return HashString(getPredicate().getFnName());
 }
 
 unsigned CheckOpcodeMatcher::getHashImpl() const {
@@ -301,7 +310,6 @@ bool CheckOpcodeMatcher::isEqualImpl(const Matcher *M) const {
           Opcode.getEnumName();
 }
 
-
 bool EmitNodeMatcherCommon::isEqualImpl(const Matcher *m) const {
   const EmitNodeMatcherCommon *M = cast<EmitNodeMatcherCommon>(m);
   return M->OpcodeName == OpcodeName && M->VTs == VTs &&
diff --git a/utils/TableGen/DAGISelMatcher.h b/utils/TableGen/DAGISelMatcher.h
index 8e6e446..dcb8da7 100644
--- a/utils/TableGen/DAGISelMatcher.h
+++ b/utils/TableGen/DAGISelMatcher.h
@@ -17,6 +17,7 @@
 #include "llvm/Support/Casting.h"
 
 namespace llvm {
+  struct CodeGenRegister;
   class CodeGenDAGPatterns;
   class Matcher;
   class PatternToMatch;
@@ -24,6 +25,8 @@ namespace llvm {
   class ComplexPattern;
   class Record;
   class SDNodeInfo;
+  class TreePredicateFn;
+  class TreePattern;
 
 Matcher *ConvertPatternToMatcher(const PatternToMatch &Pattern,unsigned Variant,
                                  const CodeGenDAGPatterns &CGP);
@@ -418,12 +421,11 @@ private:
 /// CheckPredicateMatcher - This checks the target-specific predicate to
 /// see if the node is acceptable.
 class CheckPredicateMatcher : public Matcher {
-  StringRef PredName;
+  TreePattern *Pred;
 public:
-  CheckPredicateMatcher(StringRef predname)
-    : Matcher(CheckPredicate), PredName(predname) {}
+  CheckPredicateMatcher(const TreePredicateFn &pred);
 
-  StringRef getPredicateName() const { return PredName; }
+  TreePredicateFn getPredicate() const;
 
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CheckPredicate;
@@ -435,7 +437,7 @@ public:
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const {
-    return cast<CheckPredicateMatcher>(M)->PredName == PredName;
+    return cast<CheckPredicateMatcher>(M)->Pred == Pred;
   }
   virtual unsigned getHashImpl() const;
 };
@@ -816,13 +818,13 @@ private:
 class EmitRegisterMatcher : public Matcher {
   /// Reg - The def for the register that we're emitting.  If this is null, then
   /// this is a reference to zero_reg.
-  Record *Reg;
+  const CodeGenRegister *Reg;
   MVT::SimpleValueType VT;
 public:
-  EmitRegisterMatcher(Record *reg, MVT::SimpleValueType vt)
+  EmitRegisterMatcher(const CodeGenRegister *reg, MVT::SimpleValueType vt)
     : Matcher(EmitRegister), Reg(reg), VT(vt) {}
 
-  Record *getReg() const { return Reg; }
+  const CodeGenRegister *getReg() const { return Reg; }
   MVT::SimpleValueType getVT() const { return VT; }
 
   static inline bool classof(const Matcher *N) {
diff --git a/utils/TableGen/DAGISelMatcherEmitter.cpp b/utils/TableGen/DAGISelMatcherEmitter.cpp
index 0b7fbf7..acb0135 100644
--- a/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -33,8 +33,12 @@ OmitComments("omit-comments", cl::desc("Do not generate comments"),
 namespace {
 class MatcherTableEmitter {
   const CodeGenDAGPatterns &CGP;
-  StringMap<unsigned> NodePredicateMap, PatternPredicateMap;
-  std::vector<std::string> NodePredicates, PatternPredicates;
+  
+  DenseMap<TreePattern *, unsigned> NodePredicateMap;
+  std::vector<TreePredicateFn> NodePredicates;
+  
+  StringMap<unsigned> PatternPredicateMap;
+  std::vector<std::string> PatternPredicates;
 
   DenseMap<const ComplexPattern*, unsigned> ComplexPatternMap;
   std::vector<const ComplexPattern*> ComplexPatterns;
@@ -44,26 +48,28 @@ class MatcherTableEmitter {
   std::vector<Record*> NodeXForms;
 
 public:
-  MatcherTableEmitter(const CodeGenDAGPatterns &cgp) : CGP(cgp) {}
+  MatcherTableEmitter(const CodeGenDAGPatterns &cgp)
+    : CGP(cgp) {}
 
   unsigned EmitMatcherList(const Matcher *N, unsigned Indent,
                            unsigned StartIdx, formatted_raw_ostream &OS);
-  
+
   void EmitPredicateFunctions(formatted_raw_ostream &OS);
-  
+
   void EmitHistogram(const Matcher *N, formatted_raw_ostream &OS);
 private:
   unsigned EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
                        formatted_raw_ostream &OS);
-  
-  unsigned getNodePredicate(StringRef PredName) {
-    unsigned &Entry = NodePredicateMap[PredName];
+
+  unsigned getNodePredicate(TreePredicateFn Pred) {
+    unsigned &Entry = NodePredicateMap[Pred.getOrigPatFragRecord()];
     if (Entry == 0) {
-      NodePredicates.push_back(PredName.str());
+      NodePredicates.push_back(Pred);
       Entry = NodePredicates.size();
     }
     return Entry-1;
   }
+  
   unsigned getPatternPredicate(StringRef PredName) {
     unsigned &Entry = PatternPredicateMap[PredName];
     if (Entry == 0) {
@@ -72,7 +78,6 @@ private:
     }
     return Entry-1;
   }
-  
   unsigned getComplexPat(const ComplexPattern &P) {
     unsigned &Entry = ComplexPatternMap[&P];
     if (Entry == 0) {
@@ -81,7 +86,7 @@ private:
     }
     return Entry-1;
   }
-  
+
   unsigned getNodeXFormID(Record *Rec) {
     unsigned &Entry = NodeXFormMap[Rec];
     if (Entry == 0) {
@@ -90,13 +95,13 @@ private:
     }
     return Entry-1;
   }
-  
+
 };
 } // end anonymous namespace.
 
 static unsigned GetVBRSize(unsigned Val) {
   if (Val <= 127) return 1;
-  
+
   unsigned NumBytes = 0;
   while (Val >= 128) {
     Val >>= 7;
@@ -112,7 +117,7 @@ static uint64_t EmitVBRValue(uint64_t Val, raw_ostream &OS) {
     OS << Val << ", ";
     return 1;
   }
-  
+
   uint64_t InVal = Val;
   unsigned NumBytes = 0;
   while (Val >= 128) {
@@ -133,14 +138,14 @@ unsigned MatcherTableEmitter::
 EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
             formatted_raw_ostream &OS) {
   OS.PadToColumn(Indent*2);
-  
+
   switch (N->getKind()) {
   case Matcher::Scope: {
     const ScopeMatcher *SM = cast<ScopeMatcher>(N);
     assert(SM->getNext() == 0 && "Shouldn't have next after scope");
-    
+
     unsigned StartIdx = CurrentIdx;
-    
+
     // Emit all of the children.
     for (unsigned i = 0, e = SM->getNumChildren(); i != e; ++i) {
       if (i == 0) {
@@ -164,29 +169,29 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
       unsigned VBRSize = 0;
       do {
         VBRSize = GetVBRSize(ChildSize);
-        
+
         TmpBuf.clear();
         raw_svector_ostream OS(TmpBuf);
         formatted_raw_ostream FOS(OS);
         ChildSize = EmitMatcherList(SM->getChild(i), Indent+1,
                                     CurrentIdx+VBRSize, FOS);
       } while (GetVBRSize(ChildSize) != VBRSize);
-      
+
       assert(ChildSize != 0 && "Should not have a zero-sized child!");
-    
+
       CurrentIdx += EmitVBRValue(ChildSize, OS);
       if (!OmitComments) {
         OS << "/*->" << CurrentIdx+ChildSize << "*/";
-      
+
         if (i == 0)
           OS.PadToColumn(CommentIndent) << "// " << SM->getNumChildren()
             << " children in Scope";
       }
-      
+
       OS << '\n' << TmpBuf.str();
       CurrentIdx += ChildSize;
     }
-    
+
     // Emit a zero as a sentinel indicating end of 'Scope'.
     if (!OmitComments)
       OS << "/*" << CurrentIdx << "*/";
@@ -196,7 +201,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     OS << '\n';
     return CurrentIdx - StartIdx + 1;
   }
-      
+
   case Matcher::RecordNode:
     OS << "OPC_RecordNode,";
     if (!OmitComments)
@@ -215,30 +220,30 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
         << cast<RecordChildMatcher>(N)->getWhatFor();
     OS << '\n';
     return 1;
-      
+
   case Matcher::RecordMemRef:
     OS << "OPC_RecordMemRef,\n";
     return 1;
-      
+
   case Matcher::CaptureGlueInput:
     OS << "OPC_CaptureGlueInput,\n";
     return 1;
-      
+
   case Matcher::MoveChild:
     OS << "OPC_MoveChild, " << cast<MoveChildMatcher>(N)->getChildNo() << ",\n";
     return 2;
-      
+
   case Matcher::MoveParent:
     OS << "OPC_MoveParent,\n";
     return 1;
-      
+
   case Matcher::CheckSame:
     OS << "OPC_CheckSame, "
        << cast<CheckSameMatcher>(N)->getMatchNumber() << ",\n";
     return 2;
 
   case Matcher::CheckPatternPredicate: {
-    StringRef Pred = cast<CheckPatternPredicateMatcher>(N)->getPredicate();
+    StringRef Pred =cast<CheckPatternPredicateMatcher>(N)->getPredicate();
     OS << "OPC_CheckPatternPredicate, " << getPatternPredicate(Pred) << ',';
     if (!OmitComments)
       OS.PadToColumn(CommentIndent) << "// " << Pred;
@@ -246,23 +251,23 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     return 2;
   }
   case Matcher::CheckPredicate: {
-    StringRef Pred = cast<CheckPredicateMatcher>(N)->getPredicateName();
+    TreePredicateFn Pred = cast<CheckPredicateMatcher>(N)->getPredicate();
     OS << "OPC_CheckPredicate, " << getNodePredicate(Pred) << ',';
     if (!OmitComments)
-      OS.PadToColumn(CommentIndent) << "// " << Pred;
+      OS.PadToColumn(CommentIndent) << "// " << Pred.getFnName();
     OS << '\n';
     return 2;
   }
 
   case Matcher::CheckOpcode:
-    OS << "OPC_CheckOpcode, TARGET_OPCODE("
+    OS << "OPC_CheckOpcode, TARGET_VAL("
        << cast<CheckOpcodeMatcher>(N)->getOpcode().getEnumName() << "),\n";
     return 3;
-      
+
   case Matcher::SwitchOpcode:
   case Matcher::SwitchType: {
     unsigned StartIdx = CurrentIdx;
-    
+
     unsigned NumCases;
     if (const SwitchOpcodeMatcher *SOM = dyn_cast<SwitchOpcodeMatcher>(N)) {
       OS << "OPC_SwitchOpcode ";
@@ -276,7 +281,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
       OS << "/*" << NumCases << " cases */";
     OS << ", ";
     ++CurrentIdx;
-    
+
     // For each case we emit the size, then the opcode, then the matcher.
     for (unsigned i = 0, e = NumCases; i != e; ++i) {
       const Matcher *Child;
@@ -288,7 +293,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
         Child = cast<SwitchTypeMatcher>(N)->getCaseMatcher(i);
         IdxSize = 1;  // size of type in table is 1 byte.
       }
-      
+
       // We need to encode the opcode and the offset of the case code before
       // emitting the case code.  Handle this by buffering the output into a
       // string while we get the size.  Unfortunately, the offset of the
@@ -299,29 +304,29 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
       unsigned VBRSize = 0;
       do {
         VBRSize = GetVBRSize(ChildSize);
-        
+
         TmpBuf.clear();
         raw_svector_ostream OS(TmpBuf);
         formatted_raw_ostream FOS(OS);
         ChildSize = EmitMatcherList(Child, Indent+1, CurrentIdx+VBRSize+IdxSize,
                                     FOS);
       } while (GetVBRSize(ChildSize) != VBRSize);
-      
+
       assert(ChildSize != 0 && "Should not have a zero-sized child!");
-      
+
       if (i != 0) {
         OS.PadToColumn(Indent*2);
         if (!OmitComments)
         OS << (isa<SwitchOpcodeMatcher>(N) ?
                    "/*SwitchOpcode*/ " : "/*SwitchType*/ ");
       }
-      
+
       // Emit the VBR.
       CurrentIdx += EmitVBRValue(ChildSize, OS);
-      
+
       OS << ' ';
       if (const SwitchOpcodeMatcher *SOM = dyn_cast<SwitchOpcodeMatcher>(N))
-        OS << "TARGET_OPCODE(" << SOM->getCaseOpcode(i).getEnumName() << "),";
+        OS << "TARGET_VAL(" << SOM->getCaseOpcode(i).getEnumName() << "),";
       else
         OS << getEnumName(cast<SwitchTypeMatcher>(N)->getCaseType(i)) << ',';
 
@@ -351,13 +356,13 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     OS << "OPC_CheckType, "
        << getEnumName(cast<CheckTypeMatcher>(N)->getType()) << ",\n";
     return 2;
-      
+
   case Matcher::CheckChildType:
     OS << "OPC_CheckChild"
        << cast<CheckChildTypeMatcher>(N)->getChildNo() << "Type, "
        << getEnumName(cast<CheckChildTypeMatcher>(N)->getType()) << ",\n";
     return 2;
-      
+
   case Matcher::CheckInteger: {
     OS << "OPC_CheckInteger, ";
     unsigned Bytes=1+EmitVBRValue(cast<CheckIntegerMatcher>(N)->getValue(), OS);
@@ -368,7 +373,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     OS << "OPC_CheckCondCode, ISD::"
        << cast<CheckCondCodeMatcher>(N)->getCondCodeName() << ",\n";
     return 2;
-      
+
   case Matcher::CheckValueType:
     OS << "OPC_CheckValueType, MVT::"
        << cast<CheckValueTypeMatcher>(N)->getTypeName() << ",\n";
@@ -379,20 +384,20 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     const ComplexPattern &Pattern = CCPM->getPattern();
     OS << "OPC_CheckComplexPat, /*CP*/" << getComplexPat(Pattern) << ", /*#*/"
        << CCPM->getMatchNumber() << ',';
-    
+
     if (!OmitComments) {
       OS.PadToColumn(CommentIndent) << "// " << Pattern.getSelectFunc();
       OS << ":$" << CCPM->getName();
       for (unsigned i = 0, e = Pattern.getNumOperands(); i != e; ++i)
         OS << " #" << CCPM->getFirstResult()+i;
-           
+
       if (Pattern.hasProperty(SDNPHasChain))
         OS << " + chain result";
     }
     OS << '\n';
     return 3;
   }
-      
+
   case Matcher::CheckAndImm: {
     OS << "OPC_CheckAndImm, ";
     unsigned Bytes=1+EmitVBRValue(cast<CheckAndImmMatcher>(N)->getValue(), OS);
@@ -406,11 +411,11 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     OS << '\n';
     return Bytes;
   }
-      
+
   case Matcher::CheckFoldableChainNode:
     OS << "OPC_CheckFoldableChainNode,\n";
     return 1;
-      
+
   case Matcher::EmitInteger: {
     int64_t Val = cast<EmitIntegerMatcher>(N)->getValue();
     OS << "OPC_EmitInteger, "
@@ -427,35 +432,45 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
       << Val << ",\n";
     return 3;
   }
-      
-  case Matcher::EmitRegister:
-    OS << "OPC_EmitRegister, "
-       << getEnumName(cast<EmitRegisterMatcher>(N)->getVT()) << ", ";
-    if (Record *R = cast<EmitRegisterMatcher>(N)->getReg())
-      OS << getQualifiedName(R) << ",\n";
-    else {
-      OS << "0 ";
-      if (!OmitComments)
-        OS << "/*zero_reg*/";
-      OS << ",\n";
+
+  case Matcher::EmitRegister: {
+    const EmitRegisterMatcher *Matcher = cast<EmitRegisterMatcher>(N);
+    const CodeGenRegister *Reg = Matcher->getReg();
+    // If the enum value of the register is larger than one byte can handle,
+    // use EmitRegister2.
+    if (Reg && Reg->EnumValue > 255) {
+      OS << "OPC_EmitRegister2, " << getEnumName(Matcher->getVT()) << ", ";
+      OS << "TARGET_VAL(" << getQualifiedName(Reg->TheDef) << "),\n";
+      return 4;
+    } else {
+      OS << "OPC_EmitRegister, " << getEnumName(Matcher->getVT()) << ", ";
+      if (Reg) {
+        OS << getQualifiedName(Reg->TheDef) << ",\n";
+      } else {
+        OS << "0 ";
+        if (!OmitComments)
+          OS << "/*zero_reg*/";
+        OS << ",\n";
+      }
+      return 3;
     }
-    return 3;
-      
+  }
+
   case Matcher::EmitConvertToTarget:
     OS << "OPC_EmitConvertToTarget, "
        << cast<EmitConvertToTargetMatcher>(N)->getSlot() << ",\n";
     return 2;
-      
+
   case Matcher::EmitMergeInputChains: {
     const EmitMergeInputChainsMatcher *MN =
       cast<EmitMergeInputChainsMatcher>(N);
-    
+
     // Handle the specialized forms OPC_EmitMergeInputChains1_0 and 1_1.
     if (MN->getNumNodes() == 1 && MN->getNode(0) < 2) {
       OS << "OPC_EmitMergeInputChains1_" << MN->getNode(0) << ",\n";
       return 1;
     }
-    
+
     OS << "OPC_EmitMergeInputChains, " << MN->getNumNodes() << ", ";
     for (unsigned i = 0, e = MN->getNumNodes(); i != e; ++i)
       OS << MN->getNode(i) << ", ";
@@ -477,13 +492,13 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     OS <<'\n';
     return 3;
   }
-      
+
   case Matcher::EmitNode:
   case Matcher::MorphNodeTo: {
     const EmitNodeMatcherCommon *EN = cast<EmitNodeMatcherCommon>(N);
     OS << (isa<EmitNodeMatcher>(EN) ? "OPC_EmitNode" : "OPC_MorphNodeTo");
-    OS << ", TARGET_OPCODE(" << EN->getOpcodeName() << "), 0";
-    
+    OS << ", TARGET_VAL(" << EN->getOpcodeName() << "), 0";
+
     if (EN->hasChain())   OS << "|OPFL_Chain";
     if (EN->hasInFlag())  OS << "|OPFL_GlueInput";
     if (EN->hasOutFlag()) OS << "|OPFL_GlueOutput";
@@ -491,7 +506,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     if (EN->getNumFixedArityOperands() != -1)
       OS << "|OPFL_Variadic" << EN->getNumFixedArityOperands();
     OS << ",\n";
-    
+
     OS.PadToColumn(Indent*2+4) << EN->getNumVTs();
     if (!OmitComments)
       OS << "/*#VTs*/";
@@ -506,7 +521,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     unsigned NumOperandBytes = 0;
     for (unsigned i = 0, e = EN->getNumOperands(); i != e; ++i)
       NumOperandBytes += EmitVBRValue(EN->getOperand(i), OS);
-    
+
     if (!OmitComments) {
       // Print the result #'s for EmitNode.
       if (const EmitNodeMatcher *E = dyn_cast<EmitNodeMatcher>(EN)) {
@@ -521,14 +536,14 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
 
       if (const MorphNodeToMatcher *SNT = dyn_cast<MorphNodeToMatcher>(N)) {
         OS.PadToColumn(Indent*2) << "// Src: "
-          << *SNT->getPattern().getSrcPattern() << " - Complexity = " 
+          << *SNT->getPattern().getSrcPattern() << " - Complexity = "
           << SNT->getPattern().getPatternComplexity(CGP) << '\n';
         OS.PadToColumn(Indent*2) << "// Dst: "
           << *SNT->getPattern().getDstPattern() << '\n';
       }
     } else
       OS << '\n';
-    
+
     return 6+EN->getNumVTs()+NumOperandBytes;
   }
   case Matcher::MarkGlueResults: {
@@ -549,7 +564,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     OS << '\n';
     if (!OmitComments) {
       OS.PadToColumn(Indent*2) << "// Src: "
-        << *CM->getPattern().getSrcPattern() << " - Complexity = " 
+        << *CM->getPattern().getSrcPattern() << " - Complexity = "
         << CM->getPattern().getPatternComplexity(CGP) << '\n';
       OS.PadToColumn(Indent*2) << "// Dst: "
         << *CM->getPattern().getDstPattern();
@@ -573,7 +588,7 @@ EmitMatcherList(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     unsigned MatcherSize = EmitMatcher(N, Indent, CurrentIdx, OS);
     Size += MatcherSize;
     CurrentIdx += MatcherSize;
-    
+
     // If there are other nodes in this list, iterate to them, otherwise we're
     // done.
     N = N->getNext();
@@ -592,44 +607,32 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
     OS << "  }\n";
     OS << "}\n\n";
   }
-   
+
   // Emit Node predicates.
   // FIXME: Annoyingly, these are stored by name, which we never even emit. Yay?
   StringMap<TreePattern*> PFsByName;
-  
+
   for (CodeGenDAGPatterns::pf_iterator I = CGP.pf_begin(), E = CGP.pf_end();
        I != E; ++I)
     PFsByName[I->first->getName()] = I->second;
-  
+
   if (!NodePredicates.empty()) {
     OS << "bool CheckNodePredicate(SDNode *Node, unsigned PredNo) const {\n";
     OS << "  switch (PredNo) {\n";
     OS << "  default: assert(0 && \"Invalid predicate in table?\");\n";
     for (unsigned i = 0, e = NodePredicates.size(); i != e; ++i) {
-      // FIXME: Storing this by name is horrible.
-      TreePattern *P =PFsByName[NodePredicates[i].substr(strlen("Predicate_"))];
-      assert(P && "Unknown name?");
-      
       // Emit the predicate code corresponding to this pattern.
-      std::string Code = P->getRecord()->getValueAsCode("Predicate");
-      assert(!Code.empty() && "No code in this predicate");
-      OS << "  case " << i << ": { // " << NodePredicates[i] << '\n';
-      std::string ClassName;
-      if (P->getOnlyTree()->isLeaf())
-        ClassName = "SDNode";
-      else
-        ClassName =
-          CGP.getSDNodeInfo(P->getOnlyTree()->getOperator()).getSDClassName();
-      if (ClassName == "SDNode")
-        OS << "    SDNode *N = Node;\n";
-      else
-        OS << "    " << ClassName << "*N = cast<" << ClassName << ">(Node);\n";
-      OS << Code << "\n  }\n";
+      TreePredicateFn PredFn = NodePredicates[i];
+      
+      assert(!PredFn.isAlwaysTrue() && "No code in this predicate");
+      OS << "  case " << i << ": { // " << NodePredicates[i].getFnName() <<'\n';
+      
+      OS << PredFn.getCodeToRunOnSDNode() << "\n  }\n";
     }
     OS << "  }\n";
     OS << "}\n\n";
   }
-  
+
   // Emit CompletePattern matchers.
   // FIXME: This should be const.
   if (!ComplexPatterns.empty()) {
@@ -645,7 +648,7 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
 
       if (P.hasProperty(SDNPHasChain))
         ++NumOps;  // Get the chained node too.
-      
+
       OS << "  case " << i << ":\n";
       OS << "    Result.resize(NextRes+" << NumOps << ");\n";
       OS << "    return "  << P.getSelectFunc();
@@ -655,12 +658,12 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
       // first argument.
       if (P.hasProperty(SDNPWantRoot))
         OS << "Root, ";
-      
+
       // If the complex pattern wants the parent of the operand being matched,
       // pass it in as the next argument.
       if (P.hasProperty(SDNPWantParent))
         OS << "Parent, ";
-      
+
       OS << "N";
       for (unsigned i = 0; i != NumOps; ++i)
         OS << ", Result[NextRes+" << i << "].first";
@@ -669,28 +672,28 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
     OS << "  }\n";
     OS << "}\n\n";
   }
-  
-  
+
+
   // Emit SDNodeXForm handlers.
   // FIXME: This should be const.
   if (!NodeXForms.empty()) {
     OS << "SDValue RunSDNodeXForm(SDValue V, unsigned XFormNo) {\n";
     OS << "  switch (XFormNo) {\n";
     OS << "  default: assert(0 && \"Invalid xform # in table?\");\n";
-    
+
     // FIXME: The node xform could take SDValue's instead of SDNode*'s.
     for (unsigned i = 0, e = NodeXForms.size(); i != e; ++i) {
       const CodeGenDAGPatterns::NodeXForm &Entry =
         CGP.getSDNodeTransform(NodeXForms[i]);
-      
+
       Record *SDNode = Entry.first;
       const std::string &Code = Entry.second;
-      
+
       OS << "  case " << i << ": {  ";
       if (!OmitComments)
         OS << "// " << NodeXForms[i]->getName();
       OS << '\n';
-      
+
       std::string ClassName = CGP.getSDNodeInfo(SDNode).getSDClassName();
       if (ClassName == "SDNode")
         OS << "    SDNode *N = V.getNode();\n";
@@ -710,12 +713,12 @@ static void BuildHistogram(const Matcher *M, std::vector<unsigned> &OpcodeFreq){
     if (unsigned(M->getKind()) >= OpcodeFreq.size())
       OpcodeFreq.resize(M->getKind()+1);
     OpcodeFreq[M->getKind()]++;
-  
+
     // Handle recursive nodes.
     if (const ScopeMatcher *SM = dyn_cast<ScopeMatcher>(M)) {
       for (unsigned i = 0, e = SM->getNumChildren(); i != e; ++i)
         BuildHistogram(SM->getChild(i), OpcodeFreq);
-    } else if (const SwitchOpcodeMatcher *SOM = 
+    } else if (const SwitchOpcodeMatcher *SOM =
                  dyn_cast<SwitchOpcodeMatcher>(M)) {
       for (unsigned i = 0, e = SOM->getNumCases(); i != e; ++i)
         BuildHistogram(SOM->getCaseMatcher(i), OpcodeFreq);
@@ -730,16 +733,16 @@ void MatcherTableEmitter::EmitHistogram(const Matcher *M,
                                         formatted_raw_ostream &OS) {
   if (OmitComments)
     return;
-  
+
   std::vector<unsigned> OpcodeFreq;
   BuildHistogram(M, OpcodeFreq);
-  
+
   OS << "  // Opcode Histogram:\n";
   for (unsigned i = 0, e = OpcodeFreq.size(); i != e; ++i) {
     OS << "  // #";
     switch ((Matcher::KindTy)i) {
-    case Matcher::Scope: OS << "OPC_Scope"; break; 
-    case Matcher::RecordNode: OS << "OPC_RecordNode"; break; 
+    case Matcher::Scope: OS << "OPC_Scope"; break;
+    case Matcher::RecordNode: OS << "OPC_RecordNode"; break;
     case Matcher::RecordChild: OS << "OPC_RecordChild"; break;
     case Matcher::RecordMemRef: OS << "OPC_RecordMemRef"; break;
     case Matcher::CaptureGlueInput: OS << "OPC_CaptureGlueInput"; break;
@@ -772,9 +775,9 @@ void MatcherTableEmitter::EmitHistogram(const Matcher *M,
     case Matcher::MorphNodeTo: OS << "OPC_MorphNodeTo"; break;
     case Matcher::EmitNodeXForm: OS << "OPC_EmitNodeXForm"; break;
     case Matcher::MarkGlueResults: OS << "OPC_MarkGlueResults"; break;
-    case Matcher::CompleteMatch: OS << "OPC_CompleteMatch"; break;    
+    case Matcher::CompleteMatch: OS << "OPC_CompleteMatch"; break;
     }
-    
+
     OS.PadToColumn(40) << " = " << OpcodeFreq[i] << '\n';
   }
   OS << '\n';
@@ -782,26 +785,28 @@ void MatcherTableEmitter::EmitHistogram(const Matcher *M,
 
 
 void llvm::EmitMatcherTable(const Matcher *TheMatcher,
-                            const CodeGenDAGPatterns &CGP, raw_ostream &O) {
+                            const CodeGenDAGPatterns &CGP,
+                            raw_ostream &O) {
   formatted_raw_ostream OS(O);
-  
+
   OS << "// The main instruction selector code.\n";
   OS << "SDNode *SelectCode(SDNode *N) {\n";
 
   MatcherTableEmitter MatcherEmitter(CGP);
 
-  OS << "  // Opcodes are emitted as 2 bytes, TARGET_OPCODE handles this.\n";
-  OS << "  #define TARGET_OPCODE(X) X & 255, unsigned(X) >> 8\n";
+  OS << "  // Some target values are emitted as 2 bytes, TARGET_VAL handles\n";
+  OS << "  // this.\n";
+  OS << "  #define TARGET_VAL(X) X & 255, unsigned(X) >> 8\n";
   OS << "  static const unsigned char MatcherTable[] = {\n";
   unsigned TotalSize = MatcherEmitter.EmitMatcherList(TheMatcher, 5, 0, OS);
   OS << "    0\n  }; // Total Array size is " << (TotalSize+1) << " bytes\n\n";
-  
+
   MatcherEmitter.EmitHistogram(TheMatcher, OS);
-  
-  OS << "  #undef TARGET_OPCODE\n";
+
+  OS << "  #undef TARGET_VAL\n";
   OS << "  return SelectCodeCommon(N, MatcherTable,sizeof(MatcherTable));\n}\n";
   OS << '\n';
-  
+
   // Next up, emit the function for node and pattern predicates:
   MatcherEmitter.EmitPredicateFunctions(OS);
 }
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index 7c0bade..393ac69 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -9,7 +9,9 @@
 
 #include "DAGISelMatcher.h"
 #include "CodeGenDAGPatterns.h"
+#include "CodeGenRegisters.h"
 #include "Record.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include <utility>
@@ -91,6 +93,10 @@ namespace {
     /// CurPredicate - As we emit matcher nodes, this points to the latest check
     /// which should have future checks stuck into its Next position.
     Matcher *CurPredicate;
+
+    /// RegisterDefMap - A map of register record definitions to the
+    /// corresponding target CodeGenRegister entry.
+    DenseMap<const Record *, const CodeGenRegister *> RegisterDefMap;
   public:
     MatcherGen(const PatternToMatch &pattern, const CodeGenDAGPatterns &cgp);
 
@@ -159,6 +165,12 @@ MatcherGen::MatcherGen(const PatternToMatch &pattern,
 
   // If there are types that are manifestly known, infer them.
   InferPossibleTypes();
+
+  // Populate the map from records to CodeGenRegister entries.
+  const CodeGenTarget &CGT = CGP.getTargetInfo();
+  const std::vector<CodeGenRegister> &Registers = CGT.getRegisters();
+  for (unsigned i = 0, e = Registers.size(); i != e; ++i)
+    RegisterDefMap[Registers[i].TheDef] = &Registers[i];
 }
 
 /// InferPossibleTypes - As we emit the pattern, we end up generating type
@@ -578,7 +590,8 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
   // If this is an explicit register reference, handle it.
   if (DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue())) {
     if (DI->getDef()->isSubClassOf("Register")) {
-      AddMatcher(new EmitRegisterMatcher(DI->getDef(), N->getType(0)));
+      AddMatcher(new EmitRegisterMatcher(RegisterDefMap[DI->getDef()],
+                                         N->getType(0)));
       ResultOps.push_back(NextRecordedOperandNo++);
       return;
     }
diff --git a/utils/TableGen/DAGISelMatcherOpt.cpp b/utils/TableGen/DAGISelMatcherOpt.cpp
index 3169ea1..f996422 100644
--- a/utils/TableGen/DAGISelMatcherOpt.cpp
+++ b/utils/TableGen/DAGISelMatcherOpt.cpp
@@ -18,7 +18,6 @@
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include <vector>
 using namespace llvm;
 
 /// ContractNodes - Turn multiple matcher node patterns like 'MoveChild+Record'
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index 90a2af2..d68d3b0 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -40,12 +40,12 @@ using namespace llvm::X86Disassembler;
 ///   all cases as a 64-bit instruction with only OPSIZE set.  (The XS prefix
 ///   may have effects on its execution, but does not change the instruction
 ///   returned.)  This allows considerable space savings in other tables.
-/// - Four tables (ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, and
-///   THREEBYTE3A_SYM) contain the hierarchy that the decoder traverses while
-///   decoding an instruction.  At the lowest level of this hierarchy are
-///   instruction UIDs, 16-bit integers that can be used to uniquely identify
-///   the instruction and correspond exactly to its position in the list of
-///   CodeGenInstructions for the target.
+/// - Six tables (ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, THREEBYTE3A_SYM,
+///   THREEBYTEA6_SYM, and THREEBYTEA7_SYM contain the hierarchy that the
+///   decoder traverses while decoding an instruction.  At the lowest level of
+///   this hierarchy are instruction UIDs, 16-bit integers that can be used to
+///   uniquely identify the instruction and correspond exactly to its position
+///   in the list of CodeGenInstructions for the target.
 /// - One table (INSTRUCTIONS_SYM) contains information about the operands of
 ///   each instruction and how to decode them.
 ///
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
index 020a4a3..5358c0c 100644
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 
-#include <map>
 #include <string>
 #include <vector>
 
@@ -598,6 +597,11 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
   IMM("t2adrlabel");
   IMM("shift_imm");
   IMM("neon_vcvt_imm32");
+  IMM("shr_imm8");
+  IMM("shr_imm16");
+  IMM("shr_imm32");
+  IMM("shr_imm64");
+  IMM("t2ldrlabel");
 
   MISC("brtarget", "kOperandTypeARMBranchTarget");                // ?
   MISC("uncondbrtarget", "kOperandTypeARMBranchTarget");           // ?
@@ -632,10 +636,12 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
   MISC("am6offset", "kOperandTypeARMAddrMode6Offset");            // R, I, I
   MISC("addrmode6dup", "kOperandTypeARMAddrMode6");               // R, R, I, I
   MISC("addrmodepc", "kOperandTypeARMAddrModePC");                // R, I
+  MISC("addrmode7", "kOperandTypeARMAddrMode7");                  // R
   MISC("reglist", "kOperandTypeARMRegisterList");                 // I, R, ...
   MISC("dpr_reglist", "kOperandTypeARMDPRRegisterList");          // I, R, ...
   MISC("spr_reglist", "kOperandTypeARMSPRRegisterList");          // I, R, ...
   MISC("it_mask", "kOperandTypeThumbITMask");                     // I
+  MISC("t2addrmode_reg", "kOperandTypeThumb2AddrModeReg");        // R
   MISC("t2addrmode_imm8", "kOperandTypeThumb2AddrModeImm8");      // R, I
   MISC("t2am_imm8_offset", "kOperandTypeThumb2AddrModeImm8Offset");//I
   MISC("t2addrmode_imm12", "kOperandTypeThumb2AddrModeImm12");    // R, I
@@ -853,6 +859,7 @@ static void emitCommonEnums(raw_ostream &o, unsigned int &i) {
   operandTypes.addEntry("kOperandTypeARMAddrMode5");
   operandTypes.addEntry("kOperandTypeARMAddrMode6");
   operandTypes.addEntry("kOperandTypeARMAddrMode6Offset");
+  operandTypes.addEntry("kOperandTypeARMAddrMode7");
   operandTypes.addEntry("kOperandTypeARMAddrModePC");
   operandTypes.addEntry("kOperandTypeARMRegisterList");
   operandTypes.addEntry("kOperandTypeARMDPRRegisterList");
@@ -864,6 +871,7 @@ static void emitCommonEnums(raw_ostream &o, unsigned int &i) {
   operandTypes.addEntry("kOperandTypeThumbAddrModeRR");
   operandTypes.addEntry("kOperandTypeThumbAddrModeSP");
   operandTypes.addEntry("kOperandTypeThumbAddrModePC");
+  operandTypes.addEntry("kOperandTypeThumb2AddrModeReg");
   operandTypes.addEntry("kOperandTypeThumb2SoReg");
   operandTypes.addEntry("kOperandTypeThumb2SoImm");
   operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8");
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index f01de1d..9c11bf6 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -35,36 +35,150 @@ struct InstructionMemo {
   std::string SubRegNo;
   std::vector<std::string>* PhysRegs;
 };
+  
+/// ImmPredicateSet - This uniques predicates (represented as a string) and
+/// gives them unique (small) integer ID's that start at 0.
+class ImmPredicateSet {
+  DenseMap<TreePattern *, unsigned> ImmIDs;
+  std::vector<TreePredicateFn> PredsByName;
+public:
+  
+  unsigned getIDFor(TreePredicateFn Pred) {
+    unsigned &Entry = ImmIDs[Pred.getOrigPatFragRecord()];
+    if (Entry == 0) {
+      PredsByName.push_back(Pred);
+      Entry = PredsByName.size();
+    }
+    return Entry-1;
+  }
+  
+  const TreePredicateFn &getPredicate(unsigned i) {
+    assert(i < PredsByName.size());
+    return PredsByName[i];
+  }
+  
+  typedef std::vector<TreePredicateFn>::const_iterator iterator;
+  iterator begin() const { return PredsByName.begin(); }
+  iterator end() const { return PredsByName.end(); }
+  
+};
 
 /// OperandsSignature - This class holds a description of a list of operand
 /// types. It has utility methods for emitting text based on the operands.
 ///
 struct OperandsSignature {
-  std::vector<std::string> Operands;
+  class OpKind {
+    enum { OK_Reg, OK_FP, OK_Imm, OK_Invalid = -1 };
+    char Repr;
+  public:
+    
+    OpKind() : Repr(OK_Invalid) {}
+    
+    bool operator<(OpKind RHS) const { return Repr < RHS.Repr; }
+    bool operator==(OpKind RHS) const { return Repr == RHS.Repr; }
+
+    static OpKind getReg() { OpKind K; K.Repr = OK_Reg; return K; }
+    static OpKind getFP()  { OpKind K; K.Repr = OK_FP; return K; }
+    static OpKind getImm(unsigned V) {
+      assert((unsigned)OK_Imm+V < 128 &&
+             "Too many integer predicates for the 'Repr' char");
+      OpKind K; K.Repr = OK_Imm+V; return K;
+    }
+    
+    bool isReg() const { return Repr == OK_Reg; }
+    bool isFP() const  { return Repr == OK_FP; }
+    bool isImm() const { return Repr >= OK_Imm; }
+    
+    unsigned getImmCode() const { assert(isImm()); return Repr-OK_Imm; }
+    
+    void printManglingSuffix(raw_ostream &OS, ImmPredicateSet &ImmPredicates,
+                             bool StripImmCodes) const {
+      if (isReg())
+        OS << 'r';
+      else if (isFP())
+        OS << 'f';
+      else {
+        OS << 'i';
+        if (!StripImmCodes)
+          if (unsigned Code = getImmCode())
+            OS << "_" << ImmPredicates.getPredicate(Code-1).getFnName();
+      }
+    }
+  };
+  
+  
+  SmallVector<OpKind, 3> Operands;
 
   bool operator<(const OperandsSignature &O) const {
     return Operands < O.Operands;
   }
+  bool operator==(const OperandsSignature &O) const {
+    return Operands == O.Operands;
+  }
 
   bool empty() const { return Operands.empty(); }
 
+  bool hasAnyImmediateCodes() const {
+    for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+      if (Operands[i].isImm() && Operands[i].getImmCode() != 0)
+        return true;
+    return false;
+  }
+  
+  /// getWithoutImmCodes - Return a copy of this with any immediate codes forced
+  /// to zero.
+  OperandsSignature getWithoutImmCodes() const {
+    OperandsSignature Result;
+    for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+      if (!Operands[i].isImm())
+        Result.Operands.push_back(Operands[i]);
+      else
+        Result.Operands.push_back(OpKind::getImm(0));
+    return Result;
+  }
+  
+  void emitImmediatePredicate(raw_ostream &OS, ImmPredicateSet &ImmPredicates) {
+    bool EmittedAnything = false;
+    for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+      if (!Operands[i].isImm()) continue;
+      
+      unsigned Code = Operands[i].getImmCode();
+      if (Code == 0) continue;
+      
+      if (EmittedAnything)
+        OS << " &&\n        ";
+      
+      TreePredicateFn PredFn = ImmPredicates.getPredicate(Code-1);
+      
+      // Emit the type check.
+      OS << "VT == "
+         << getEnumName(PredFn.getOrigPatFragRecord()->getTree(0)->getType(0))
+         << " && ";
+      
+      
+      OS << PredFn.getFnName() << "(imm" << i <<')';
+      EmittedAnything = true;
+    }
+  }
+  
   /// initialize - Examine the given pattern and initialize the contents
   /// of the Operands array accordingly. Return true if all the operands
   /// are supported, false otherwise.
   ///
-  bool initialize(TreePatternNode *InstPatNode,
-                  const CodeGenTarget &Target,
-                  MVT::SimpleValueType VT) {
-
-    if (!InstPatNode->isLeaf()) {
-      if (InstPatNode->getOperator()->getName() == "imm") {
-        Operands.push_back("i");
-        return true;
-      }
-      if (InstPatNode->getOperator()->getName() == "fpimm") {
-        Operands.push_back("f");
-        return true;
-      }
+  bool initialize(TreePatternNode *InstPatNode, const CodeGenTarget &Target,
+                  MVT::SimpleValueType VT,
+                  ImmPredicateSet &ImmediatePredicates) {
+    if (InstPatNode->isLeaf())
+      return false;
+    
+    if (InstPatNode->getOperator()->getName() == "imm") {
+      Operands.push_back(OpKind::getImm(0));
+      return true;
+    }
+    
+    if (InstPatNode->getOperator()->getName() == "fpimm") {
+      Operands.push_back(OpKind::getFP());
+      return true;
     }
 
     const CodeGenRegisterClass *DstRC = 0;
@@ -72,35 +186,65 @@ struct OperandsSignature {
     for (unsigned i = 0, e = InstPatNode->getNumChildren(); i != e; ++i) {
       TreePatternNode *Op = InstPatNode->getChild(i);
 
+      // Handle imm operands specially.
+      if (!Op->isLeaf() && Op->getOperator()->getName() == "imm") {
+        unsigned PredNo = 0;
+        if (!Op->getPredicateFns().empty()) {
+          TreePredicateFn PredFn = Op->getPredicateFns()[0];
+          // If there is more than one predicate weighing in on this operand
+          // then we don't handle it.  This doesn't typically happen for
+          // immediates anyway.
+          if (Op->getPredicateFns().size() > 1 ||
+              !PredFn.isImmediatePattern())
+            return false;
+          // Ignore any instruction with 'FastIselShouldIgnore', these are
+          // not needed and just bloat the fast instruction selector.  For
+          // example, X86 doesn't need to generate code to match ADD16ri8 since
+          // ADD16ri will do just fine.
+          Record *Rec = PredFn.getOrigPatFragRecord()->getRecord();
+          if (Rec->getValueAsBit("FastIselShouldIgnore"))
+            return false;
+        
+          PredNo = ImmediatePredicates.getIDFor(PredFn)+1;
+        }
+        
+        // Handle unmatched immediate sizes here.
+        //if (Op->getType(0) != VT)
+        //  return false;
+        
+        Operands.push_back(OpKind::getImm(PredNo));
+        continue;
+      }
+
+      
       // For now, filter out any operand with a predicate.
       // For now, filter out any operand with multiple values.
-      if (!Op->getPredicateFns().empty() ||
-          Op->getNumTypes() != 1)
-        return false;
-
-      assert(Op->hasTypeSet(0) && "Type infererence not done?");
-      // For now, all the operands must have the same type.
-      if (Op->getType(0) != VT)
+      if (!Op->getPredicateFns().empty() || Op->getNumTypes() != 1)
         return false;
 
       if (!Op->isLeaf()) {
-        if (Op->getOperator()->getName() == "imm") {
-          Operands.push_back("i");
-          continue;
-        }
-        if (Op->getOperator()->getName() == "fpimm") {
-          Operands.push_back("f");
+         if (Op->getOperator()->getName() == "fpimm") {
+          Operands.push_back(OpKind::getFP());
           continue;
         }
         // For now, ignore other non-leaf nodes.
         return false;
       }
+      
+      assert(Op->hasTypeSet(0) && "Type infererence not done?");
+
+      // For now, all the operands must have the same type (if they aren't
+      // immediates).  Note that this causes us to reject variable sized shifts
+      // on X86.
+      if (Op->getType(0) != VT)
+        return false;
+
       DefInit *OpDI = dynamic_cast<DefInit*>(Op->getLeafValue());
       if (!OpDI)
         return false;
       Record *OpLeafRec = OpDI->getDef();
+      
       // For now, the only other thing we accept is register operands.
-
       const CodeGenRegisterClass *RC = 0;
       if (OpLeafRec->isSubClassOf("RegisterClass"))
         RC = &Target.getRegisterClass(OpLeafRec);
@@ -120,18 +264,18 @@ struct OperandsSignature {
           return false;
       } else
         DstRC = RC;
-      Operands.push_back("r");
+      Operands.push_back(OpKind::getReg());
     }
     return true;
   }
 
   void PrintParameters(raw_ostream &OS) const {
     for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
-      if (Operands[i] == "r") {
+      if (Operands[i].isReg()) {
         OS << "unsigned Op" << i << ", bool Op" << i << "IsKill";
-      } else if (Operands[i] == "i") {
+      } else if (Operands[i].isImm()) {
         OS << "uint64_t imm" << i;
-      } else if (Operands[i] == "f") {
+      } else if (Operands[i].isFP()) {
         OS << "ConstantFP *f" << i;
       } else {
         assert("Unknown operand kind!");
@@ -143,7 +287,7 @@ struct OperandsSignature {
   }
 
   void PrintArguments(raw_ostream &OS,
-                      const std::vector<std::string>& PR) const {
+                      const std::vector<std::string> &PR) const {
     assert(PR.size() == Operands.size());
     bool PrintedArg = false;
     for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
@@ -153,13 +297,13 @@ struct OperandsSignature {
 
       if (PrintedArg)
         OS << ", ";
-      if (Operands[i] == "r") {
+      if (Operands[i].isReg()) {
         OS << "Op" << i << ", Op" << i << "IsKill";
         PrintedArg = true;
-      } else if (Operands[i] == "i") {
+      } else if (Operands[i].isImm()) {
         OS << "imm" << i;
         PrintedArg = true;
-      } else if (Operands[i] == "f") {
+      } else if (Operands[i].isFP()) {
         OS << "f" << i;
         PrintedArg = true;
       } else {
@@ -171,11 +315,11 @@ struct OperandsSignature {
 
   void PrintArguments(raw_ostream &OS) const {
     for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
-      if (Operands[i] == "r") {
+      if (Operands[i].isReg()) {
         OS << "Op" << i << ", Op" << i << "IsKill";
-      } else if (Operands[i] == "i") {
+      } else if (Operands[i].isImm()) {
         OS << "imm" << i;
-      } else if (Operands[i] == "f") {
+      } else if (Operands[i].isFP()) {
         OS << "f" << i;
       } else {
         assert("Unknown operand kind!");
@@ -187,8 +331,9 @@ struct OperandsSignature {
   }
 
 
-  void PrintManglingSuffix(raw_ostream &OS,
-                           const std::vector<std::string>& PR) const {
+  void PrintManglingSuffix(raw_ostream &OS, const std::vector<std::string> &PR,
+                           ImmPredicateSet &ImmPredicates,
+                           bool StripImmCodes = false) const {
     for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
       if (PR[i] != "")
         // Implicit physical register operand. e.g. Instruction::Mul expect to
@@ -197,14 +342,14 @@ struct OperandsSignature {
         // like a binary instruction except for the very inner FastEmitInst_*
         // call.
         continue;
-      OS << Operands[i];
+      Operands[i].printManglingSuffix(OS, ImmPredicates, StripImmCodes);
     }
   }
 
-  void PrintManglingSuffix(raw_ostream &OS) const {
-    for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
-      OS << Operands[i];
-    }
+  void PrintManglingSuffix(raw_ostream &OS, ImmPredicateSet &ImmPredicates,
+                           bool StripImmCodes = false) const {
+    for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+      Operands[i].printManglingSuffix(OS, ImmPredicates, StripImmCodes);
   }
 };
 
@@ -218,13 +363,17 @@ class FastISelMap {
 
   OperandsOpcodeTypeRetPredMap SimplePatterns;
 
+  std::map<OperandsSignature, std::vector<OperandsSignature> >
+    SignaturesWithConstantForms;
+  
   std::string InstNS;
-
+  ImmPredicateSet ImmediatePredicates;
 public:
   explicit FastISelMap(std::string InstNS);
 
-  void CollectPatterns(CodeGenDAGPatterns &CGP);
-  void PrintFunctionDefinitions(raw_ostream &OS);
+  void collectPatterns(CodeGenDAGPatterns &CGP);
+  void printImmediatePredicates(raw_ostream &OS);
+  void printFunctionDefinitions(raw_ostream &OS);
 };
 
 }
@@ -244,7 +393,34 @@ FastISelMap::FastISelMap(std::string instns)
   : InstNS(instns) {
 }
 
-void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
+static std::string PhyRegForNode(TreePatternNode *Op,
+                                 const CodeGenTarget &Target) {
+  std::string PhysReg;
+
+  if (!Op->isLeaf())
+    return PhysReg;
+
+  DefInit *OpDI = dynamic_cast<DefInit*>(Op->getLeafValue());
+  Record *OpLeafRec = OpDI->getDef();
+  if (!OpLeafRec->isSubClassOf("Register"))
+    return PhysReg;
+
+  PhysReg += static_cast<StringInit*>(OpLeafRec->getValue( \
+             "Namespace")->getValue())->getValue();
+  PhysReg += "::";
+
+  std::vector<CodeGenRegister> Regs = Target.getRegisters();
+  for (unsigned i = 0; i < Regs.size(); ++i) {
+    if (Regs[i].TheDef == OpLeafRec) {
+      PhysReg += Regs[i].getName();
+      break;
+    }
+  }
+
+  return PhysReg;
+}
+
+void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) {
   const CodeGenTarget &Target = CGP.getTargetInfo();
 
   // Determine the target's namespace name.
@@ -264,7 +440,7 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
     if (!Op->isSubClassOf("Instruction"))
       continue;
     CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(Op);
-    if (II.Operands.size() == 0)
+    if (II.Operands.empty())
       continue;
 
     // For now, ignore multi-instruction patterns.
@@ -322,54 +498,45 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
       VT = InstPatNode->getChild(0)->getType(0);
     }
 
-    // For now, filter out instructions which just set a register to
-    // an Operand or an immediate, like MOV32ri.
-    if (InstPatOp->isSubClassOf("Operand"))
-      continue;
-
     // For now, filter out any instructions with predicates.
     if (!InstPatNode->getPredicateFns().empty())
       continue;
 
     // Check all the operands.
     OperandsSignature Operands;
-    if (!Operands.initialize(InstPatNode, Target, VT))
+    if (!Operands.initialize(InstPatNode, Target, VT, ImmediatePredicates))
       continue;
 
     std::vector<std::string>* PhysRegInputs = new std::vector<std::string>();
-    if (!InstPatNode->isLeaf() &&
-        (InstPatNode->getOperator()->getName() == "imm" ||
-         InstPatNode->getOperator()->getName() == "fpimmm"))
+    if (InstPatNode->getOperator()->getName() == "imm" ||
+        InstPatNode->getOperator()->getName() == "fpimmm")
       PhysRegInputs->push_back("");
-    else if (!InstPatNode->isLeaf()) {
+    else {
+      // Compute the PhysRegs used by the given pattern, and check that
+      // the mapping from the src to dst patterns is simple.
+      bool FoundNonSimplePattern = false;
+      unsigned DstIndex = 0;
       for (unsigned i = 0, e = InstPatNode->getNumChildren(); i != e; ++i) {
-        TreePatternNode *Op = InstPatNode->getChild(i);
-        if (!Op->isLeaf()) {
-          PhysRegInputs->push_back("");
-          continue;
-        }
-
-        DefInit *OpDI = dynamic_cast<DefInit*>(Op->getLeafValue());
-        Record *OpLeafRec = OpDI->getDef();
-        std::string PhysReg;
-        if (OpLeafRec->isSubClassOf("Register")) {
-          PhysReg += static_cast<StringInit*>(OpLeafRec->getValue( \
-                     "Namespace")->getValue())->getValue();
-          PhysReg += "::";
-
-          std::vector<CodeGenRegister> Regs = Target.getRegisters();
-          for (unsigned i = 0; i < Regs.size(); ++i) {
-            if (Regs[i].TheDef == OpLeafRec) {
-              PhysReg += Regs[i].getName();
-              break;
-            }
+        std::string PhysReg = PhyRegForNode(InstPatNode->getChild(i), Target);
+        if (PhysReg.empty()) {
+          if (DstIndex >= Dst->getNumChildren() ||
+              Dst->getChild(DstIndex)->getName() !=
+              InstPatNode->getChild(i)->getName()) {
+            FoundNonSimplePattern = true;
+            break;
           }
+          ++DstIndex;
         }
 
         PhysRegInputs->push_back(PhysReg);
       }
-    } else
-      PhysRegInputs->push_back("");
+
+      if (Op->getName() != "EXTRACT_SUBREG" && DstIndex < Dst->getNumChildren())
+        FoundNonSimplePattern = true;
+
+      if (FoundNonSimplePattern)
+        continue;
+    }
 
     // Get the predicate that guards this pattern.
     std::string PredicateCheck = Pattern.getPredicateCheck();
@@ -381,15 +548,39 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
       SubRegNo,
       PhysRegInputs
     };
-    if (SimplePatterns[Operands][OpcodeName][VT][RetVT]
-            .count(PredicateCheck))
-      throw TGError(Pattern.getSrcRecord()->getLoc(), "Duplicate record!");
+    
+    if (SimplePatterns[Operands][OpcodeName][VT][RetVT].count(PredicateCheck))
+      throw TGError(Pattern.getSrcRecord()->getLoc(),
+                    "Duplicate record in FastISel table!");
 
     SimplePatterns[Operands][OpcodeName][VT][RetVT][PredicateCheck] = Memo;
+    
+    // If any of the operands were immediates with predicates on them, strip
+    // them down to a signature that doesn't have predicates so that we can
+    // associate them with the stripped predicate version.
+    if (Operands.hasAnyImmediateCodes()) {
+      SignaturesWithConstantForms[Operands.getWithoutImmCodes()]
+        .push_back(Operands);
+    }
   }
 }
 
-void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
+void FastISelMap::printImmediatePredicates(raw_ostream &OS) {
+  if (ImmediatePredicates.begin() == ImmediatePredicates.end())
+    return;
+  
+  OS << "\n// FastEmit Immediate Predicate functions.\n";
+  for (ImmPredicateSet::iterator I = ImmediatePredicates.begin(),
+       E = ImmediatePredicates.end(); I != E; ++I) {
+    OS << "static bool " << I->getFnName() << "(int64_t Imm) {\n";
+    OS << I->getImmediatePredicateCode() << "\n}\n";
+  }
+  
+  OS << "\n\n";
+}
+
+
+void FastISelMap::printFunctionDefinitions(raw_ostream &OS) {
   // Now emit code for all the patterns that we collected.
   for (OperandsOpcodeTypeRetPredMap::const_iterator OI = SimplePatterns.begin(),
        OE = SimplePatterns.end(); OI != OE; ++OI) {
@@ -420,7 +611,7 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
                << getLegalCName(Opcode)
                << "_" << getLegalCName(getName(VT))
                << "_" << getLegalCName(getName(RetVT)) << "_";
-            Operands.PrintManglingSuffix(OS);
+            Operands.PrintManglingSuffix(OS, ImmediatePredicates);
             OS << "(";
             Operands.PrintParameters(OS);
             OS << ") {\n";
@@ -451,7 +642,8 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
 
               OS << "  return FastEmitInst_";
               if (Memo.SubRegNo.empty()) {
-                Operands.PrintManglingSuffix(OS, *Memo.PhysRegs);
+                Operands.PrintManglingSuffix(OS, *Memo.PhysRegs,
+                                             ImmediatePredicates, true);
                 OS << "(" << InstNS << Memo.Name << ", ";
                 OS << InstNS << Memo.RC->getName() << "RegisterClass";
                 if (!Operands.empty())
@@ -460,9 +652,7 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
                 OS << ");\n";
               } else {
                 OS << "extractsubreg(" << getName(RetVT);
-                OS << ", Op0, Op0IsKill, ";
-                OS << Memo.SubRegNo;
-                OS << ");\n";
+                OS << ", Op0, Op0IsKill, " << Memo.SubRegNo << ");\n";
               }
 
               if (HasPred)
@@ -480,7 +670,7 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
           OS << "unsigned FastEmit_"
              << getLegalCName(Opcode) << "_"
              << getLegalCName(getName(VT)) << "_";
-          Operands.PrintManglingSuffix(OS);
+          Operands.PrintManglingSuffix(OS, ImmediatePredicates);
           OS << "(MVT RetVT";
           if (!Operands.empty())
             OS << ", ";
@@ -492,7 +682,7 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
             OS << "  case " << getName(RetVT) << ": return FastEmit_"
                << getLegalCName(Opcode) << "_" << getLegalCName(getName(VT))
                << "_" << getLegalCName(getName(RetVT)) << "_";
-            Operands.PrintManglingSuffix(OS);
+            Operands.PrintManglingSuffix(OS, ImmediatePredicates);
             OS << "(";
             Operands.PrintArguments(OS);
             OS << ");\n";
@@ -504,7 +694,7 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
           OS << "unsigned FastEmit_"
              << getLegalCName(Opcode) << "_"
              << getLegalCName(getName(VT)) << "_";
-          Operands.PrintManglingSuffix(OS);
+          Operands.PrintManglingSuffix(OS, ImmediatePredicates);
           OS << "(MVT RetVT";
           if (!Operands.empty())
             OS << ", ";
@@ -544,7 +734,8 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
             OS << "  return FastEmitInst_";
 
             if (Memo.SubRegNo.empty()) {
-              Operands.PrintManglingSuffix(OS, *Memo.PhysRegs);
+              Operands.PrintManglingSuffix(OS, *Memo.PhysRegs,
+                                           ImmediatePredicates, true);
               OS << "(" << InstNS << Memo.Name << ", ";
               OS << InstNS << Memo.RC->getName() << "RegisterClass";
               if (!Operands.empty())
@@ -572,7 +763,7 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
       // Emit one function for the opcode that demultiplexes based on the type.
       OS << "unsigned FastEmit_"
          << getLegalCName(Opcode) << "_";
-      Operands.PrintManglingSuffix(OS);
+      Operands.PrintManglingSuffix(OS, ImmediatePredicates);
       OS << "(MVT VT, MVT RetVT";
       if (!Operands.empty())
         OS << ", ";
@@ -585,7 +776,7 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
         std::string TypeName = getName(VT);
         OS << "  case " << TypeName << ": return FastEmit_"
            << getLegalCName(Opcode) << "_" << getLegalCName(TypeName) << "_";
-        Operands.PrintManglingSuffix(OS);
+        Operands.PrintManglingSuffix(OS, ImmediatePredicates);
         OS << "(RetVT";
         if (!Operands.empty())
           OS << ", ";
@@ -604,12 +795,44 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
     // Emit one function for the operand signature that demultiplexes based
     // on opcode and type.
     OS << "unsigned FastEmit_";
-    Operands.PrintManglingSuffix(OS);
+    Operands.PrintManglingSuffix(OS, ImmediatePredicates);
     OS << "(MVT VT, MVT RetVT, unsigned Opcode";
     if (!Operands.empty())
       OS << ", ";
     Operands.PrintParameters(OS);
     OS << ") {\n";
+    
+    // If there are any forms of this signature available that operand on
+    // constrained forms of the immediate (e.g. 32-bit sext immediate in a
+    // 64-bit operand), check them first.
+    
+    std::map<OperandsSignature, std::vector<OperandsSignature> >::iterator MI
+      = SignaturesWithConstantForms.find(Operands);
+    if (MI != SignaturesWithConstantForms.end()) {
+      // Unique any duplicates out of the list.
+      std::sort(MI->second.begin(), MI->second.end());
+      MI->second.erase(std::unique(MI->second.begin(), MI->second.end()),
+                       MI->second.end());
+      
+      // Check each in order it was seen.  It would be nice to have a good
+      // relative ordering between them, but we're not going for optimality
+      // here.
+      for (unsigned i = 0, e = MI->second.size(); i != e; ++i) {
+        OS << "  if (";
+        MI->second[i].emitImmediatePredicate(OS, ImmediatePredicates);
+        OS << ")\n    if (unsigned Reg = FastEmit_";
+        MI->second[i].PrintManglingSuffix(OS, ImmediatePredicates);
+        OS << "(VT, RetVT, Opcode";
+        if (!MI->second[i].empty())
+          OS << ", ";
+        MI->second[i].PrintArguments(OS);
+        OS << "))\n      return Reg;\n\n";
+      }
+      
+      // Done with this, remove it.
+      SignaturesWithConstantForms.erase(MI);
+    }
+    
     OS << "  switch (Opcode) {\n";
     for (OpcodeTypeRetPredMap::const_iterator I = OTM.begin(), E = OTM.end();
          I != E; ++I) {
@@ -617,7 +840,7 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
 
       OS << "  case " << Opcode << ": return FastEmit_"
          << getLegalCName(Opcode) << "_";
-      Operands.PrintManglingSuffix(OS);
+      Operands.PrintManglingSuffix(OS, ImmediatePredicates);
       OS << "(VT, RetVT";
       if (!Operands.empty())
         OS << ", ";
@@ -629,6 +852,8 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
     OS << "}\n";
     OS << "\n";
   }
+  
+  // TODO: SignaturesWithConstantForms should be empty here.
 }
 
 void FastISelEmitter::run(raw_ostream &OS) {
@@ -642,12 +867,12 @@ void FastISelEmitter::run(raw_ostream &OS) {
                        Target.getName() + " target", OS);
 
   FastISelMap F(InstNS);
-  F.CollectPatterns(CGP);
-  F.PrintFunctionDefinitions(OS);
+  F.collectPatterns(CGP);
+  F.printImmediatePredicates(OS);
+  F.printFunctionDefinitions(OS);
 }
 
 FastISelEmitter::FastISelEmitter(RecordKeeper &R)
-  : Records(R),
-    CGP(R) {
+  : Records(R), CGP(R) {
 }
 
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index 2c222b3..9312fe8 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -438,7 +438,7 @@ void Filter::recurse() {
     for (bitIndex = 0; bitIndex < NumBits; bitIndex++)
       BitValueArray[StartBit + bitIndex] = BIT_UNSET;
 
-    // Delegates to an inferior filter chooser for futher processing on this
+    // Delegates to an inferior filter chooser for further processing on this
     // group of instructions whose segment values are variable.
     FilterChooserMap.insert(std::pair<unsigned, FilterChooser*>(
                               (unsigned)-1,
@@ -471,7 +471,7 @@ void Filter::recurse() {
         BitValueArray[StartBit + bitIndex] = BIT_FALSE;
     }
 
-    // Delegates to an inferior filter chooser for futher processing on this
+    // Delegates to an inferior filter chooser for further processing on this
     // category of instructions.
     FilterChooserMap.insert(std::pair<unsigned, FilterChooser*>(
                               mapIterator->first,
@@ -611,7 +611,8 @@ void FilterChooser::emitTop(raw_ostream &o, unsigned Indentation) {
   o << '\n';
 
   o.indent(Indentation) <<
-    "static bool decodeInstruction(MCInst &MI, field_t insn) {\n";
+    "static bool decodeInstruction(MCInst &MI, field_t insn, "
+    "uint64_t Address, const void *Decoder) {\n";
   o.indent(Indentation) << "  unsigned tmp = 0;\n";
 
   ++Indentation; ++Indentation;
@@ -795,7 +796,8 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
          I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
       // If a custom instruction decoder was specified, use that.
       if (I->FieldBase == ~0U && I->FieldLength == ~0U) {
-        o.indent(Indentation) << "  " << I->Decoder << "(MI, insn);\n";
+        o.indent(Indentation) << "  " << I->Decoder
+                              << "(MI, insn, Address, Decoder);\n";
         break;
       }
 
@@ -803,7 +805,8 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
         << "  tmp = fieldFromInstruction(insn, " << I->FieldBase
         << ", " << I->FieldLength << ");\n";
       if (I->Decoder != "") {
-        o.indent(Indentation) << "  " << I->Decoder << "(MI, tmp);\n";
+        o.indent(Indentation) << "  " << I->Decoder
+                              << "(MI, tmp, Address, Decoder);\n";
       } else {
         o.indent(Indentation)
           << "  MI.addOperand(MCOperand::CreateImm(tmp));\n";
@@ -846,7 +849,8 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
        I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
     // If a custom instruction decoder was specified, use that.
     if (I->FieldBase == ~0U && I->FieldLength == ~0U) {
-      o.indent(Indentation) << "  " << I->Decoder << "(MI, insn);\n";
+      o.indent(Indentation) << "  " << I->Decoder
+                            << "(MI, insn, Address, Decoder);\n";
       break;
     }
 
@@ -854,7 +858,8 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
       << "  tmp = fieldFromInstruction(insn, " << I->FieldBase
       << ", " << I->FieldLength << ");\n";
     if (I->Decoder != "") {
-      o.indent(Indentation) << "  " << I->Decoder << "(MI, tmp);\n";
+      o.indent(Indentation) << "  " << I->Decoder
+                            << "(MI, tmp, Address, Decoder);\n";
     } else {
       o.indent(Indentation)
         << "  MI.addOperand(MCOperand::CreateImm(tmp));\n";
@@ -1224,7 +1229,8 @@ bool FixedLenDecoderEmitter::populateInstruction(const CodeGenInstruction &CGI,
   if (Bits.allInComplete()) return false;
 
   // Ignore "asm parser only" instructions.
-  if (Def.getValueAsBit("isAsmParserOnly"))
+  if (Def.getValueAsBit("isAsmParserOnly") ||
+      Def.getValueAsBit("isCodeGenOnly"))
     return false;
 
   std::vector<OperandInfo> InsnOperands;
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 2b684be..67cce0e 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -272,6 +272,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   if (Inst.isIndirectBranch)   OS << "|(1<<TID::IndirectBranch)";
   if (Inst.isCompare)          OS << "|(1<<TID::Compare)";
   if (Inst.isMoveImm)          OS << "|(1<<TID::MoveImm)";
+  if (Inst.isBitcast)          OS << "|(1<<TID::Bitcast)";
   if (Inst.isBarrier)          OS << "|(1<<TID::Barrier)";
   if (Inst.hasDelaySlot)       OS << "|(1<<TID::DelaySlot)";
   if (Inst.isCall)             OS << "|(1<<TID::Call)";
diff --git a/utils/TableGen/LLVMCConfigurationEmitter.cpp b/utils/TableGen/LLVMCConfigurationEmitter.cpp
index c40a39d..6572595 100644
--- a/utils/TableGen/LLVMCConfigurationEmitter.cpp
+++ b/utils/TableGen/LLVMCConfigurationEmitter.cpp
@@ -3028,6 +3028,8 @@ void CheckDriverData(DriverData& Data) {
   FilterNotInGraph(Data.Edges, Data.ToolDescs);
 
   // Typecheck the compilation graph.
+  // TODO: use a genuine graph representation instead of a vector and check for
+  // multiple edges.
   TypecheckGraph(Data.Edges, Data.ToolDescs);
 
   // Check that there are no options without side effects (specified
diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp
index 64224d9..123abef 100644
--- a/utils/TableGen/NeonEmitter.cpp
+++ b/utils/TableGen/NeonEmitter.cpp
@@ -462,9 +462,34 @@ static std::string MangleName(const std::string &name, StringRef typestr,
   return s;
 }
 
+/// UseMacro - Examine the prototype string to determine if the intrinsic
+/// should be defined as a preprocessor macro instead of an inline function.
+static bool UseMacro(const std::string &proto) {
+  // If this builtin takes an immediate argument, we need to #define it rather
+  // than use a standard declaration, so that SemaChecking can range check
+  // the immediate passed by the user.
+  if (proto.find('i') != std::string::npos)
+    return true;
+
+  // Pointer arguments need to use macros to avoid hiding aligned attributes
+  // from the pointer type.
+  if (proto.find('p') != std::string::npos ||
+      proto.find('c') != std::string::npos)
+    return true;
+
+  return false;
+}
+
+/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
+/// defined as a macro should be accessed directly instead of being first
+/// assigned to a local temporary.
+static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
+  return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
+}
+
 // Generate the string "(argtype a, argtype b, ...)"
 static std::string GenArgs(const std::string &proto, StringRef typestr) {
-  bool define = proto.find('i') != std::string::npos;
+  bool define = UseMacro(proto);
   char arg = 'a';
 
   std::string s;
@@ -472,10 +497,10 @@ static std::string GenArgs(const std::string &proto, StringRef typestr) {
 
   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
     if (define) {
-      // Immediate macro arguments are used directly instead of being assigned
+      // Some macro arguments are used directly instead of being assigned
       // to local temporaries; prepend an underscore prefix to make their
       // names consistent with the local temporaries.
-      if (proto[i] == 'i')
+      if (MacroArgUsedDirectly(proto, i))
         s += "__";
     } else {
       s += TypeString(proto[i], typestr) + " __";
@@ -494,11 +519,28 @@ static std::string GenArgs(const std::string &proto, StringRef typestr) {
 static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
   char arg = 'a';
   std::string s;
+  bool generatedLocal = false;
 
   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
     // Do not create a temporary for an immediate argument.
     // That would defeat the whole point of using a macro!
-    if (proto[i] == 'i') continue;
+    if (proto[i] == 'i')
+      continue;
+    generatedLocal = true;
+
+    // For other (non-immediate) arguments that are used directly, a local
+    // temporary is still needed to get the correct type checking, even though
+    // that temporary is not used for anything.
+    if (MacroArgUsedDirectly(proto, i)) {
+      s += TypeString(proto[i], typestr) + " __";
+      s.push_back(arg);
+      s += "_ = (__";
+      s.push_back(arg);
+      s += "); (void)__";
+      s.push_back(arg);
+      s += "_; ";
+      continue;
+    }
 
     s += TypeString(proto[i], typestr) + " __";
     s.push_back(arg);
@@ -507,7 +549,8 @@ static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
     s += "); ";
   }
 
-  s += "\\\n  ";
+  if (generatedLocal)
+    s += "\\\n  ";
   return s;
 }
 
@@ -568,11 +611,7 @@ static std::string GenOpString(OpKind op, const std::string &proto,
                                StringRef typestr) {
   bool quad;
   unsigned nElts = GetNumElements(typestr, quad);
-
-  // If this builtin takes an immediate argument, we need to #define it rather
-  // than use a standard declaration, so that SemaChecking can range check
-  // the immediate passed by the user.
-  bool define = proto.find('i') != std::string::npos;
+  bool define = UseMacro(proto);
 
   std::string ts = TypeString(proto[0], typestr);
   std::string s;
@@ -608,16 +647,9 @@ static std::string GenOpString(OpKind op, const std::string &proto,
   case OpMul:
     s += "__a * __b;";
     break;
-  case OpMullN:
-    s += Extend(typestr, "__a") + " * " +
-      Extend(typestr, Duplicate(nElts << (int)quad, typestr, "__b")) + ";";
-    break;
   case OpMullLane:
-    s += Extend(typestr, "__a") + " * " +
-      Extend(typestr, SplatLane(nElts, "__b", "__c")) + ";";
-    break;
-  case OpMull:
-    s += Extend(typestr, "__a") + " * " + Extend(typestr, "__b") + ";";
+    s += MangleName("vmull", typestr, ClassS) + "(__a, " +
+      SplatLane(nElts, "__b", "__c") + ");";
     break;
   case OpMlaN:
     s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
@@ -629,16 +661,15 @@ static std::string GenOpString(OpKind op, const std::string &proto,
     s += "__a + (__b * __c);";
     break;
   case OpMlalN:
-    s += "__a + (" + Extend(typestr, "__b") + " * " +
-      Extend(typestr, Duplicate(nElts, typestr, "__c")) + ");";
+    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
+      Duplicate(nElts, typestr, "__c") + ");";
     break;
   case OpMlalLane:
-    s += "__a + (" + Extend(typestr, "__b") + " * " +
-      Extend(typestr, SplatLane(nElts, "__c", "__d")) + ");";
+    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
+      SplatLane(nElts, "__c", "__d") + ");";
     break;
   case OpMlal:
-    s += "__a + (" + Extend(typestr, "__b") + " * " +
-      Extend(typestr, "__c") + ");";
+    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
     break;
   case OpMlsN:
     s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
@@ -650,16 +681,15 @@ static std::string GenOpString(OpKind op, const std::string &proto,
     s += "__a - (__b * __c);";
     break;
   case OpMlslN:
-    s += "__a - (" + Extend(typestr, "__b") + " * " +
-      Extend(typestr, Duplicate(nElts, typestr, "__c")) + ");";
+    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
+      Duplicate(nElts, typestr, "__c") + ");";
     break;
   case OpMlslLane:
-    s += "__a - (" + Extend(typestr, "__b") + " * " +
-      Extend(typestr, SplatLane(nElts, "__c", "__d")) + ");";
+    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
+      SplatLane(nElts, "__c", "__d") + ");";
     break;
   case OpMlsl:
-    s += "__a - (" + Extend(typestr, "__b") + " * " +
-      Extend(typestr, "__c") + ");";
+    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
     break;
   case OpQDMullLane:
     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
@@ -867,10 +897,7 @@ static std::string GenBuiltin(const std::string &name, const std::string &proto,
   // sret-like argument.
   bool sret = (proto[0] >= '2' && proto[0] <= '4');
 
-  // If this builtin takes an immediate argument, we need to #define it rather
-  // than use a standard declaration, so that SemaChecking can range check
-  // the immediate passed by the user.
-  bool define = proto.find('i') != std::string::npos;
+  bool define = UseMacro(proto);
 
   // Check if the prototype has a scalar operand with the type of the vector
   // elements.  If not, bitcasting the args will take care of arg checking.
@@ -1008,7 +1035,7 @@ static std::string GenIntrinsic(const std::string &name,
                                 StringRef outTypeStr, StringRef inTypeStr,
                                 OpKind kind, ClassKind classKind) {
   assert(!proto.empty() && "");
-  bool define = proto.find('i') != std::string::npos;
+  bool define = UseMacro(proto);
   std::string s;
 
   // static always inline + return type
@@ -1148,17 +1175,20 @@ void NeonEmitter::run(raw_ostream &OS) {
 
   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
 
-  // Emit vmovl and vabd intrinsics first so they can be used by other
+  // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
   // intrinsics.  (Some of the saturating multiply instructions are also
   // used to implement the corresponding "_lane" variants, but tablegen
   // sorts the records into alphabetical order so that the "_lane" variants
   // come after the intrinsics they use.)
   emitIntrinsic(OS, Records.getDef("VMOVL"));
+  emitIntrinsic(OS, Records.getDef("VMULL"));
   emitIntrinsic(OS, Records.getDef("VABD"));
 
   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
     Record *R = RV[i];
-    if (R->getName() != "VMOVL" && R->getName() != "VABD")
+    if (R->getName() != "VMOVL" &&
+        R->getName() != "VMULL" &&
+        R->getName() != "VABD")
       emitIntrinsic(OS, R);
   }
 
diff --git a/utils/TableGen/NeonEmitter.h b/utils/TableGen/NeonEmitter.h
index 1e6fcbf..12e4e86 100644
--- a/utils/TableGen/NeonEmitter.h
+++ b/utils/TableGen/NeonEmitter.h
@@ -30,13 +30,11 @@ enum OpKind {
   OpSubl,
   OpSubw,
   OpMul,
-  OpMull,
   OpMla,
   OpMlal,
   OpMls,
   OpMlsl,
   OpMulN,
-  OpMullN,
   OpMlaN,
   OpMlsN,
   OpMlalN,
@@ -105,13 +103,11 @@ namespace llvm {
       OpMap["OP_SUBL"]  = OpSubl;
       OpMap["OP_SUBW"]  = OpSubw;
       OpMap["OP_MUL"]   = OpMul;
-      OpMap["OP_MULL"]  = OpMull;
       OpMap["OP_MLA"]   = OpMla;
       OpMap["OP_MLAL"]  = OpMlal;
       OpMap["OP_MLS"]   = OpMls;
       OpMap["OP_MLSL"]  = OpMlsl;
       OpMap["OP_MUL_N"] = OpMulN;
-      OpMap["OP_MULL_N"]= OpMullN;
       OpMap["OP_MLA_N"] = OpMlaN;
       OpMap["OP_MLS_N"] = OpMlsN;
       OpMap["OP_MLAL_N"] = OpMlalN;
diff --git a/utils/TableGen/OptParserEmitter.cpp b/utils/TableGen/OptParserEmitter.cpp
index 6892912..431026c 100644
--- a/utils/TableGen/OptParserEmitter.cpp
+++ b/utils/TableGen/OptParserEmitter.cpp
@@ -35,7 +35,7 @@ static int CompareOptionRecords(const void *Av, const void *Bv) {
   const Record *A = *(Record**) Av;
   const Record *B = *(Record**) Bv;
 
-  // Sentinel options preceed all others and are only ordered by precedence.
+  // Sentinel options precede all others and are only ordered by precedence.
   bool ASent = A->getValueAsDef("Kind")->getValueAsBit("Sentinel");
   bool BSent = B->getValueAsDef("Kind")->getValueAsBit("Sentinel");
   if (ASent != BSent)
diff --git a/utils/TableGen/Record.h b/utils/TableGen/Record.h
index f3a5df2..522b719 100644
--- a/utils/TableGen/Record.h
+++ b/utils/TableGen/Record.h
@@ -707,7 +707,7 @@ class CodeInit : public Init {
 public:
   explicit CodeInit(const std::string &V) : Value(V) {}
 
-  const std::string getValue() const { return Value; }
+  const std::string &getValue() const { return Value; }
 
   virtual Init *convertInitializerTo(RecTy *Ty) {
     return Ty->convertValue(this);
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 96399a4..4ddc47d 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -38,7 +38,10 @@ void RegisterInfoEmitter::runEnums(raw_ostream &OS) {
   OS << "enum {\n  NoRegister,\n";
 
   for (unsigned i = 0, e = Registers.size(); i != e; ++i)
-    OS << "  " << Registers[i].getName() << ", \t// " << i+1 << "\n";
+    OS << "  " << Registers[i].getName() << " = " <<
+      Registers[i].EnumValue << ",\n";
+  assert(Registers.size() == Registers[Registers.size()-1].EnumValue &&
+         "Register enum value mismatch!");
   OS << "  NUM_TARGET_REGS \t// " << Registers.size()+1 << "\n";
   OS << "};\n";
   if (!Namespace.empty())
@@ -91,7 +94,7 @@ void RegisterInfoEmitter::runHeader(raw_ostream &OS) {
   if (!RegisterClasses.empty()) {
     OS << "namespace " << RegisterClasses[0].Namespace
        << " { // Register classes\n";
-       
+
     OS << "  enum {\n";
     for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) {
       if (i) OS << ",\n";
@@ -358,7 +361,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
 
     // Give the register class a legal C name if it's anonymous.
     std::string Name = RC.TheDef->getName();
-  
+
     // Emit the register list now.
     OS << "  // " << Name << " Register Class...\n"
        << "  static const unsigned " << Name
@@ -376,12 +379,12 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   // Emit the ValueType arrays for each RegisterClass
   for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) {
     const CodeGenRegisterClass &RC = RegisterClasses[rc];
-    
+
     // Give the register class a legal C name if it's anonymous.
     std::string Name = RC.TheDef->getName() + "VTs";
-    
+
     // Emit the register list now.
-    OS << "  // " << Name 
+    OS << "  // " << Name
        << " Register Class Value Types...\n"
        << "  static const EVT " << Name
        << "[] = {\n    ";
@@ -390,7 +393,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
     OS << "MVT::Other\n  };\n\n";
   }
   OS << "}  // end anonymous namespace\n\n";
-  
+
   // Now that all of the structs have been emitted, emit the instances.
   if (!RegisterClasses.empty()) {
     OS << "namespace " << RegisterClasses[0].Namespace
@@ -398,7 +401,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
     for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i)
       OS << "  " << RegisterClasses[i].getName()  << "Class\t"
          << RegisterClasses[i].getName() << "RegClass;\n";
-         
+
     std::map<unsigned, std::set<unsigned> > SuperClassMap;
     std::map<unsigned, std::set<unsigned> > SuperRegClassMap;
     OS << "\n";
@@ -488,7 +491,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
       // Give the register class a legal C name if it's anonymous.
       std::string Name = RC.TheDef->getName();
 
-      OS << "  // " << Name 
+      OS << "  // " << Name
          << " Register Class sub-classes...\n"
          << "  static const TargetRegisterClass* const "
          << Name << "Subclasses[] = {\n    ";
@@ -500,7 +503,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
         // Sub-classes are used to determine if a virtual register can be used
         // as an instruction operand, or if it must be copied first.
         if (rc == rc2 || !RC.hasSubClass(&RC2)) continue;
-      
+
         if (!Empty) OS << ", ";
         OS << "&" << getQualifiedName(RC2.TheDef) << "RegClass";
         Empty = false;
@@ -524,7 +527,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
       // Give the register class a legal C name if it's anonymous.
       std::string Name = RC.TheDef->getName();
 
-      OS << "  // " << Name 
+      OS << "  // " << Name
          << " Register Class super-classes...\n"
          << "  static const TargetRegisterClass* const "
          << Name << "Superclasses[] = {\n    ";
@@ -538,7 +541,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
           const CodeGenRegisterClass &RC2 = RegisterClasses[*II];
           if (!Empty) OS << ", ";
           OS << "&" << getQualifiedName(RC2.TheDef) << "RegClass";
-          Empty = false;        
+          Empty = false;
         }
       }
 
@@ -550,7 +553,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
     for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) {
       const CodeGenRegisterClass &RC = RegisterClasses[i];
       OS << RC.MethodBodies << "\n";
-      OS << RC.getName() << "Class::" << RC.getName() 
+      OS << RC.getName() << "Class::" << RC.getName()
          << "Class()  : TargetRegisterClass("
          << RC.getName() + "RegClassID" << ", "
          << '\"' << RC.getName() << "\", "
@@ -567,7 +570,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
          << RC.getName() << ", " << RC.getName() << " + " << RC.Elements.size()
          << ") {}\n";
     }
-  
+
     OS << "}\n";
   }
 
@@ -584,7 +587,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   std::map<Record*, std::set<Record*>, LessRecord> RegisterAliases;
   typedef std::map<Record*, std::vector<int64_t>, LessRecord> DwarfRegNumsMapTy;
   DwarfRegNumsMapTy DwarfRegNums;
-  
+
   const std::vector<CodeGenRegister> &Regs = Target.getRegisters();
 
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
@@ -623,7 +626,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
                      RegisterAliases);
     }
   }
-  
+
   // Print the SubregHashTable, a simple quadratically probed
   // hash table for determining if a register is a subregister
   // of another register.
@@ -633,13 +636,13 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
     RegNo[Regs[i].TheDef] = i;
     NumSubRegs += RegisterSubRegs[Regs[i].TheDef].size();
   }
-  
+
   unsigned SubregHashTableSize = 2 * NextPowerOf2(2 * NumSubRegs);
   unsigned* SubregHashTable = new unsigned[2 * SubregHashTableSize];
   std::fill(SubregHashTable, SubregHashTable + 2 * SubregHashTableSize, ~0U);
-  
+
   unsigned hashMisses = 0;
-  
+
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
     Record* R = Regs[i].TheDef;
     for (std::set<Record*>::iterator I = RegisterSubRegs[R].begin(),
@@ -654,26 +657,26 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
              SubregHashTable[index*2+1] != ~0U) {
         index = (index + ProbeAmt) & (SubregHashTableSize-1);
         ProbeAmt += 2;
-        
+
         hashMisses++;
       }
-      
+
       SubregHashTable[index*2] = i;
       SubregHashTable[index*2+1] = RegNo[RJ];
     }
   }
-  
+
   OS << "\n\n  // Number of hash collisions: " << hashMisses << "\n";
-  
+
   if (SubregHashTableSize) {
     std::string Namespace = Regs[0].TheDef->getValueAsString("Namespace");
-    
+
     OS << "  const unsigned SubregHashTable[] = { ";
     for (unsigned i = 0; i < SubregHashTableSize - 1; ++i) {
       if (i != 0)
         // Insert spaces for nice formatting.
         OS << "                                       ";
-      
+
       if (SubregHashTable[2*i] != ~0U) {
         OS << getQualifiedName(Regs[SubregHashTable[2*i]].TheDef) << ", "
            << getQualifiedName(Regs[SubregHashTable[2*i+1]].TheDef) << ", \n";
@@ -681,7 +684,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
         OS << Namespace << "::NoRegister, " << Namespace << "::NoRegister, \n";
       }
     }
-    
+
     unsigned Idx = SubregHashTableSize*2-2;
     if (SubregHashTable[Idx] != ~0U) {
       OS << "                                       "
@@ -690,14 +693,14 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
     } else {
       OS << Namespace << "::NoRegister, " << Namespace << "::NoRegister };\n";
     }
-    
+
     OS << "  const unsigned SubregHashTableSize = "
        << SubregHashTableSize << ";\n";
   } else {
     OS << "  const unsigned SubregHashTable[] = { ~0U, ~0U };\n"
        << "  const unsigned SubregHashTableSize = 1;\n";
   }
-  
+
   delete [] SubregHashTable;
 
 
@@ -709,13 +712,13 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
     RegNo[Regs[i].TheDef] = i;
     NumAliases += RegisterAliases[Regs[i].TheDef].size();
   }
-  
+
   unsigned AliasesHashTableSize = 2 * NextPowerOf2(2 * NumAliases);
   unsigned* AliasesHashTable = new unsigned[2 * AliasesHashTableSize];
   std::fill(AliasesHashTable, AliasesHashTable + 2 * AliasesHashTableSize, ~0U);
-  
+
   hashMisses = 0;
-  
+
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
     Record* R = Regs[i].TheDef;
     for (std::set<Record*>::iterator I = RegisterAliases[R].begin(),
@@ -730,26 +733,26 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
              AliasesHashTable[index*2+1] != ~0U) {
         index = (index + ProbeAmt) & (AliasesHashTableSize-1);
         ProbeAmt += 2;
-        
+
         hashMisses++;
       }
-      
+
       AliasesHashTable[index*2] = i;
       AliasesHashTable[index*2+1] = RegNo[RJ];
     }
   }
-  
+
   OS << "\n\n  // Number of hash collisions: " << hashMisses << "\n";
-  
+
   if (AliasesHashTableSize) {
     std::string Namespace = Regs[0].TheDef->getValueAsString("Namespace");
-    
+
     OS << "  const unsigned AliasesHashTable[] = { ";
     for (unsigned i = 0; i < AliasesHashTableSize - 1; ++i) {
       if (i != 0)
         // Insert spaces for nice formatting.
         OS << "                                       ";
-      
+
       if (AliasesHashTable[2*i] != ~0U) {
         OS << getQualifiedName(Regs[AliasesHashTable[2*i]].TheDef) << ", "
            << getQualifiedName(Regs[AliasesHashTable[2*i+1]].TheDef) << ", \n";
@@ -757,7 +760,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
         OS << Namespace << "::NoRegister, " << Namespace << "::NoRegister, \n";
       }
     }
-    
+
     unsigned Idx = AliasesHashTableSize*2-2;
     if (AliasesHashTable[Idx] != ~0U) {
       OS << "                                       "
@@ -766,14 +769,14 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
     } else {
       OS << Namespace << "::NoRegister, " << Namespace << "::NoRegister };\n";
     }
-    
+
     OS << "  const unsigned AliasesHashTableSize = "
        << AliasesHashTableSize << ";\n";
   } else {
     OS << "  const unsigned AliasesHashTable[] = { ~0U, ~0U };\n"
        << "  const unsigned AliasesHashTableSize = 1;\n";
   }
-  
+
   delete [] AliasesHashTable;
 
   if (!RegisterAliases.empty())
@@ -838,7 +841,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   }
 
   OS<<"\n  const TargetRegisterDesc RegisterDescriptors[] = { // Descriptors\n";
-  OS << "    { \"NOREG\",\t0,\t0,\t0 },\n";
+  OS << "    { \"NOREG\",\t0,\t0,\t0,\t0 },\n";
 
   // Now that register alias and sub-registers sets have been emitted, emit the
   // register descriptors now.
@@ -851,9 +854,10 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
     else
       OS << "Empty_SubRegsSet,\t";
     if (!RegisterSuperRegs[Reg.TheDef].empty())
-      OS << Reg.getName() << "_SuperRegsSet },\n";
+      OS << Reg.getName() << "_SuperRegsSet,\t";
     else
-      OS << "Empty_SuperRegsSet },\n";
+      OS << "Empty_SuperRegsSet,\t";
+    OS << Reg.CostPerUse << " },\n";
   }
   OS << "  };\n";      // End of register descriptors...
 
@@ -966,7 +970,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   }
 
   // Now we know maximal length of number list. Append -1's, where needed
-  for (DwarfRegNumsMapTy::iterator 
+  for (DwarfRegNumsMapTy::iterator
        I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I)
     for (unsigned i = I->second.size(), e = maxLength; i != e; ++i)
       I->second.push_back(-1);
@@ -978,18 +982,18 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
      << "  default:\n"
      << "    assert(0 && \"Unknown DWARF flavour\");\n"
      << "    return -1;\n";
-  
+
   for (unsigned i = 0, e = maxLength; i != e; ++i) {
     OS << "  case " << i << ":\n"
        << "    switch (RegNum) {\n"
        << "    default:\n"
        << "      assert(0 && \"Invalid RegNum\");\n"
        << "      return -1;\n";
-    
+
     // Sort by name to get a stable order.
-    
 
-    for (DwarfRegNumsMapTy::iterator 
+
+    for (DwarfRegNumsMapTy::iterator
            I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I) {
       int RegNo = I->second[i];
       if (RegNo != -2)
@@ -1002,7 +1006,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
     }
     OS << "    };\n";
   }
-    
+
   OS << "  };\n}\n\n";
 
   OS << "} // End llvm namespace \n";
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index e35bdca..928fa4b 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -31,24 +31,30 @@ void SubtargetEmitter::Enumeration(raw_ostream &OS,
 
   // Open enumeration
   OS << "enum {\n";
-  
+
   // For each record
-  for (unsigned i = 0, N = DefList.size(); i < N;) {
+  unsigned N = DefList.size();
+  if (N > 64) {
+    errs() << "Too many (> 64) subtarget features!\n";
+    exit(1);
+  }
+
+  for (unsigned i = 0; i < N;) {
     // Next record
     Record *Def = DefList[i];
-    
+
     // Get and emit name
     OS << "  " << Def->getName();
-    
+
     // If bit flags then emit expression (1 << i)
-    if (isBits)  OS << " = " << " 1 << " << i;
+    if (isBits)  OS << " = " << " 1ULL << " << i;
 
     // Depending on 'if more in the list' emit comma
     if (++i < N) OS << ",";
-    
+
     OS << "\n";
   }
-  
+
   // Close enumeration
   OS << "};\n";
 }
@@ -66,7 +72,7 @@ void SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
   // Begin feature table
   OS << "// Sorted (by key) array of values for CPU features.\n"
      << "static const llvm::SubtargetFeatureKV FeatureKV[] = {\n";
-  
+
   // For each feature
   for (unsigned i = 0, N = FeatureList.size(); i < N; ++i) {
     // Next feature
@@ -75,20 +81,20 @@ void SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
     const std::string &Name = Feature->getName();
     const std::string &CommandLineName = Feature->getValueAsString("Name");
     const std::string &Desc = Feature->getValueAsString("Desc");
-    
+
     if (CommandLineName.empty()) continue;
-    
+
     // Emit as { "feature", "description", featureEnum, i1 | i2 | ... | in }
     OS << "  { "
        << "\"" << CommandLineName << "\", "
        << "\"" << Desc << "\", "
        << Name << ", ";
 
-    const std::vector<Record*> &ImpliesList = 
+    const std::vector<Record*> &ImpliesList =
       Feature->getValueAsListOfDefs("Implies");
-    
+
     if (ImpliesList.empty()) {
-      OS << "0";
+      OS << "0ULL";
     } else {
       for (unsigned j = 0, M = ImpliesList.size(); j < M;) {
         OS << ImpliesList[j]->getName();
@@ -97,13 +103,13 @@ void SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
     }
 
     OS << " }";
-    
+
     // Depending on 'if more in the list' emit comma
     if ((i + 1) < N) OS << ",";
-    
+
     OS << "\n";
   }
-  
+
   // End feature table
   OS << "};\n";
 
@@ -126,39 +132,39 @@ void SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
   // Begin processor table
   OS << "// Sorted (by key) array of values for CPU subtype.\n"
      << "static const llvm::SubtargetFeatureKV SubTypeKV[] = {\n";
-     
+
   // For each processor
   for (unsigned i = 0, N = ProcessorList.size(); i < N;) {
     // Next processor
     Record *Processor = ProcessorList[i];
 
     const std::string &Name = Processor->getValueAsString("Name");
-    const std::vector<Record*> &FeatureList = 
+    const std::vector<Record*> &FeatureList =
       Processor->getValueAsListOfDefs("Features");
-    
+
     // Emit as { "cpu", "description", f1 | f2 | ... fn },
     OS << "  { "
        << "\"" << Name << "\", "
        << "\"Select the " << Name << " processor\", ";
-    
+
     if (FeatureList.empty()) {
-      OS << "0";
+      OS << "0ULL";
     } else {
       for (unsigned j = 0, M = FeatureList.size(); j < M;) {
         OS << FeatureList[j]->getName();
         if (++j < M) OS << " | ";
       }
     }
-    
+
     // The "0" is for the "implies" section of this data structure.
-    OS << ", 0 }";
-    
+    OS << ", 0ULL }";
+
     // Depending on 'if more in the list' emit comma
     if (++i < N) OS << ",";
-    
+
     OS << "\n";
   }
-  
+
   // End processor table
   OS << "};\n";
 
@@ -185,7 +191,7 @@ CollectAllItinClasses(raw_ostream &OS,
     // Assign itinerary class a unique number
     ItinClassesMap[ItinClass->getName()] = i;
   }
-  
+
   // Emit size of table
   OS<<"\nenum {\n";
   OS<<"  ItinClassesSize = " << N << "\n";
@@ -213,21 +219,21 @@ void SubtargetEmitter::FormItineraryStageString(const std::string &Name,
   for (unsigned i = 0; i < N;) {
     // Next stage
     const Record *Stage = StageList[i];
-  
+
     // Form string as ,{ cycles, u1 | u2 | ... | un, timeinc, kind }
     int Cycles = Stage->getValueAsInt("Cycles");
     ItinString += "  { " + itostr(Cycles) + ", ";
-    
+
     // Get unit list
     const std::vector<Record*> &UnitList = Stage->getValueAsListOfDefs("Units");
-    
+
     // For each unit
     for (unsigned j = 0, M = UnitList.size(); j < M;) {
       // Add name and bitwise or
       ItinString += Name + "FU::" + UnitList[j]->getName();
       if (++j < M) ItinString += " | ";
     }
-    
+
     int TimeInc = Stage->getValueAsInt("TimeInc");
     ItinString += ", " + itostr(TimeInc);
 
@@ -256,7 +262,7 @@ void SubtargetEmitter::FormItineraryOperandCycleString(Record *ItinData,
   for (unsigned i = 0; i < N;) {
     // Next operand cycle
     const int OCycle = OperandCycleList[i];
-  
+
     ItinString += "  " + itostr(OCycle);
     if (++i < N) ItinString += ", ";
   }
@@ -292,7 +298,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
   // Gather processor iteraries
   std::vector<Record*> ProcItinList =
                        Records.getAllDerivedDefinitions("ProcessorItineraries");
-  
+
   // If just no itinerary then don't bother
   if (ProcItinList.size() < 2) return;
 
@@ -332,7 +338,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
   // Begin stages table
   std::string StageTable = "\nstatic const llvm::InstrStage Stages[] = {\n";
   StageTable += "  { 0, 0, 0, llvm::InstrStage::Required }, // No itinerary\n";
-        
+
   // Begin operand cycle table
   std::string OperandCycleTable = "static const unsigned OperandCycles[] = {\n";
   OperandCycleTable += "  0, // No itinerary\n";
@@ -340,32 +346,31 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
   // Begin pipeline bypass table
   std::string BypassTable = "static const unsigned ForwardingPathes[] = {\n";
   BypassTable += "  0, // No itinerary\n";
-        
+
   unsigned StageCount = 1, OperandCycleCount = 1;
-  unsigned ItinStageEnum = 1, ItinOperandCycleEnum = 1;
   std::map<std::string, unsigned> ItinStageMap, ItinOperandMap;
   for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
     // Next record
     Record *Proc = ProcItinList[i];
-    
+
     // Get processor itinerary name
     const std::string &Name = Proc->getName();
-    
+
     // Skip default
     if (Name == "NoItineraries") continue;
-    
+
     // Create and expand processor itinerary to cover all itinerary classes
     std::vector<InstrItinerary> ItinList;
     ItinList.resize(NItinClasses);
-    
+
     // Get itinerary data list
     std::vector<Record*> ItinDataList = Proc->getValueAsListOfDefs("IID");
-    
+
     // For each itinerary data
     for (unsigned j = 0, M = ItinDataList.size(); j < M; j++) {
       // Next itinerary data
       Record *ItinData = ItinDataList[j];
-      
+
       // Get string and stage count
       std::string ItinStageString;
       unsigned NStages;
@@ -386,15 +391,17 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
       if (NStages > 0) {
         FindStage = ItinStageMap[ItinStageString];
         if (FindStage == 0) {
-          // Emit as { cycles, u1 | u2 | ... | un, timeinc }, // index
-          StageTable += ItinStageString + ", // " + itostr(ItinStageEnum) + "\n";
+          // Emit as { cycles, u1 | u2 | ... | un, timeinc }, // indices
+          StageTable += ItinStageString + ", // " + itostr(StageCount);
+          if (NStages > 1)
+            StageTable += "-" + itostr(StageCount + NStages - 1);
+          StageTable += "\n";
           // Record Itin class number.
           ItinStageMap[ItinStageString] = FindStage = StageCount;
           StageCount += NStages;
-          ItinStageEnum++;
         }
       }
-      
+
       // Check to see if operand cycle already exists and create if it doesn't
       unsigned FindOperandCycle = 0;
       if (NOperandCycles > 0) {
@@ -402,25 +409,25 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
         FindOperandCycle = ItinOperandMap[ItinOperandString];
         if (FindOperandCycle == 0) {
           // Emit as  cycle, // index
-          OperandCycleTable += ItinOperandCycleString + ", // " + 
-            itostr(ItinOperandCycleEnum) + "\n";
+          OperandCycleTable += ItinOperandCycleString + ", // ";
+          std::string OperandIdxComment = itostr(OperandCycleCount);
+          if (NOperandCycles > 1)
+            OperandIdxComment += "-"
+              + itostr(OperandCycleCount + NOperandCycles - 1);
+          OperandCycleTable += OperandIdxComment + "\n";
           // Record Itin class number.
-          ItinOperandMap[ItinOperandCycleString] = 
+          ItinOperandMap[ItinOperandCycleString] =
             FindOperandCycle = OperandCycleCount;
-
           // Emit as bypass, // index
-          BypassTable += ItinBypassString + ", // " + 
-            itostr(ItinOperandCycleEnum) + "\n";
-
+          BypassTable += ItinBypassString + ", // " + OperandIdxComment + "\n";
           OperandCycleCount += NOperandCycles;
-          ItinOperandCycleEnum++;
         }
       }
-      
+
       // Locate where to inject into processor itinerary table
       const std::string &Name = ItinData->getValueAsDef("TheClass")->getName();
       unsigned Find = ItinClassesMap[Name];
-      
+
       // Set up itinerary as location and location + stage count
       unsigned NumUOps = ItinClassList[Find]->getValueAsInt("NumMicroOps");
       InstrItinerary Intinerary = { NumUOps, FindStage, FindStage + NStages,
@@ -430,7 +437,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
       // Inject - empty slots will be 0, 0
       ItinList[Find] = Intinerary;
     }
-    
+
     // Add process itinerary to list
     ProcList.push_back(ItinList);
   }
@@ -450,7 +457,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
   OS << StageTable;
   OS << OperandCycleTable;
   OS << BypassTable;
-  
+
   // Emit size of tables
   OS<<"\nenum {\n";
   OS<<"  StagesSize = sizeof(Stages)/sizeof(llvm::InstrStage),\n";
@@ -461,12 +468,14 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
 //
 // EmitProcessorData - Generate data for processor itineraries.
 //
-void SubtargetEmitter::EmitProcessorData(raw_ostream &OS,
-      std::vector<std::vector<InstrItinerary> > &ProcList) {
+void SubtargetEmitter::
+EmitProcessorData(raw_ostream &OS,
+                  std::vector<Record*> &ItinClassList,
+                  std::vector<std::vector<InstrItinerary> > &ProcList) {
   // Get an iterator for processor itinerary stages
   std::vector<std::vector<InstrItinerary> >::iterator
       ProcListIter = ProcList.begin();
-  
+
   // For each processor itinerary
   std::vector<Record*> Itins =
                        Records.getAllDerivedDefinitions("ProcessorItineraries");
@@ -476,35 +485,36 @@ void SubtargetEmitter::EmitProcessorData(raw_ostream &OS,
 
     // Get processor itinerary name
     const std::string &Name = Itin->getName();
-    
+
     // Skip default
     if (Name == "NoItineraries") continue;
 
     // Begin processor itinerary table
     OS << "\n";
     OS << "static const llvm::InstrItinerary " << Name << "[] = {\n";
-    
+
     // For each itinerary class
     std::vector<InstrItinerary> &ItinList = *ProcListIter++;
+    assert(ItinList.size() == ItinClassList.size() && "bad itinerary");
     for (unsigned j = 0, M = ItinList.size(); j < M; ++j) {
       InstrItinerary &Intinerary = ItinList[j];
-      
-      // Emit in the form of 
+
+      // Emit in the form of
       // { firstStage, lastStage, firstCycle, lastCycle } // index
       if (Intinerary.FirstStage == 0) {
         OS << "  { 1, 0, 0, 0, 0 }";
       } else {
         OS << "  { " <<
           Intinerary.NumMicroOps << ", " <<
-          Intinerary.FirstStage << ", " << 
-          Intinerary.LastStage << ", " << 
-          Intinerary.FirstOperandCycle << ", " << 
+          Intinerary.FirstStage << ", " <<
+          Intinerary.LastStage << ", " <<
+          Intinerary.FirstOperandCycle << ", " <<
           Intinerary.LastOperandCycle << " }";
       }
-      
-      OS << ", // " << j << "\n";
+
+      OS << ", // " << j << " " << ItinClassList[j]->getName() << "\n";
     }
-    
+
     // End processor itinerary table
     OS << "  { 1, ~0U, ~0U, ~0U, ~0U } // end marker\n";
     OS << "};\n";
@@ -524,7 +534,7 @@ void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
   OS << "\n";
   OS << "// Sorted (by key) array of itineraries for CPU subtype.\n"
      << "static const llvm::SubtargetInfoKV ProcItinKV[] = {\n";
-     
+
   // For each processor
   for (unsigned i = 0, N = ProcessorList.size(); i < N;) {
     // Next processor
@@ -533,20 +543,20 @@ void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
     const std::string &Name = Processor->getValueAsString("Name");
     const std::string &ProcItin =
       Processor->getValueAsDef("ProcItin")->getName();
-    
+
     // Emit as { "cpu", procinit },
     OS << "  { "
        << "\"" << Name << "\", "
        << "(void *)&" << ProcItin;
-        
+
     OS << " }";
-    
+
     // Depending on ''if more in the list'' emit comma
     if (++i < N) OS << ",";
-    
+
     OS << "\n";
   }
-  
+
   // End processor table
   OS << "};\n";
 
@@ -566,20 +576,20 @@ void SubtargetEmitter::EmitData(raw_ostream &OS) {
   std::vector<Record*> ItinClassList =
     Records.getAllDerivedDefinitions("InstrItinClass");
   std::sort(ItinClassList.begin(), ItinClassList.end(), LessRecord());
-  
+
   // Enumerate all the itinerary classes
   unsigned NItinClasses = CollectAllItinClasses(OS, ItinClassesMap,
                                                 ItinClassList);
   // Make sure the rest is worth the effort
   HasItineraries = NItinClasses != 1;   // Ignore NoItinerary.
-  
+
   if (HasItineraries) {
     std::vector<std::vector<InstrItinerary> > ProcList;
     // Emit the stage data
     EmitStageAndOperandCycleData(OS, NItinClasses, ItinClassesMap,
                                  ItinClassList, ProcList);
     // Emit the processor itinerary data
-    EmitProcessorData(OS, ProcList);
+    EmitProcessorData(OS, ItinClassList, ProcList);
     // Emit the processor lookup data
     EmitProcessorLookup(OS);
   }
@@ -594,8 +604,8 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) {
                        Records.getAllDerivedDefinitions("SubtargetFeature");
   std::sort(Features.begin(), Features.end(), LessRecord());
 
-  OS << "// ParseSubtargetFeatures - Parses features string setting specified\n" 
-     << "// subtarget options.\n" 
+  OS << "// ParseSubtargetFeatures - Parses features string setting specified\n"
+     << "// subtarget options.\n"
      << "std::string llvm::";
   OS << Target;
   OS << "Subtarget::ParseSubtargetFeatures(const std::string &FS,\n"
@@ -604,7 +614,7 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) {
      << "  DEBUG(dbgs() << \"\\nCPU:\" << CPU);\n"
      << "  SubtargetFeatures Features(FS);\n"
      << "  Features.setCPUIfNone(CPU);\n"
-     << "  uint32_t Bits =  Features.getBits(SubTypeKV, SubTypeKVSize,\n"
+     << "  uint64_t Bits =  Features.getBits(SubTypeKV, SubTypeKVSize,\n"
      << "                                    FeatureKV, FeatureKVSize);\n";
 
   for (unsigned i = 0; i < Features.size(); i++) {
@@ -618,7 +628,7 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) {
       OS << "  if ((Bits & " << Instance << ") != 0) "
          << Attribute << " = " << Value << ";\n";
     else
-      OS << "  if ((Bits & " << Instance << ") != 0 && " << Attribute << 
+      OS << "  if ((Bits & " << Instance << ") != 0 && " << Attribute <<
             " < " << Value << ") " << Attribute << " = " << Value << ";\n";
   }
 
diff --git a/utils/TableGen/SubtargetEmitter.h b/utils/TableGen/SubtargetEmitter.h
index 3abec3b..93055b7 100644
--- a/utils/TableGen/SubtargetEmitter.h
+++ b/utils/TableGen/SubtargetEmitter.h
@@ -24,11 +24,11 @@
 namespace llvm {
 
 class SubtargetEmitter : public TableGenBackend {
-  
+
   RecordKeeper &Records;
   std::string Target;
   bool HasItineraries;
-  
+
   void Enumeration(raw_ostream &OS, const char *ClassName, bool isBits);
   void FeatureKeyValues(raw_ostream &OS);
   void CPUKeyValues(raw_ostream &OS);
@@ -48,11 +48,12 @@ class SubtargetEmitter : public TableGenBackend {
                      std::vector<Record*> &ItinClassList,
                      std::vector<std::vector<InstrItinerary> > &ProcList);
   void EmitProcessorData(raw_ostream &OS,
-                       std::vector<std::vector<InstrItinerary> > &ProcList);
+                         std::vector<Record*> &ItinClassList,
+                         std::vector<std::vector<InstrItinerary> > &ProcList);
   void EmitProcessorLookup(raw_ostream &OS);
   void EmitData(raw_ostream &OS);
   void ParseFeaturesFunction(raw_ostream &OS);
-  
+
 public:
   SubtargetEmitter(RecordKeeper &R) : Records(R), HasItineraries(false) {}
 
diff --git a/utils/TableGen/TGLexer.h b/utils/TableGen/TGLexer.h
index 55a6c5d..e1aa5a7 100644
--- a/utils/TableGen/TGLexer.h
+++ b/utils/TableGen/TGLexer.h
@@ -15,7 +15,6 @@
 #define TGLEXER_H
 
 #include "llvm/Support/DataTypes.h"
-#include <vector>
 #include <string>
 #include <cassert>
 
@@ -36,7 +35,7 @@ namespace tgtok {
     l_brace, r_brace,   // { }
     l_paren, r_paren,   // ( )
     less, greater,      // < >
-    colon, semi,        // ; :
+    colon, semi,        // : ;
     comma, period,      // , .
     equal, question,    // = ?
     
diff --git a/utils/TableGen/TGParser.cpp b/utils/TableGen/TGParser.cpp
index f6041be..59097f9 100644
--- a/utils/TableGen/TGParser.cpp
+++ b/utils/TableGen/TGParser.cpp
@@ -1153,6 +1153,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
         s << "Type mismatch for list, expected list type, got "
           << ItemType->getAsString();
         TokError(s.str());
+        return 0;
       }
       GivenListTy = ListType;
     }
diff --git a/utils/TableGen/TGValueTypes.cpp b/utils/TableGen/TGValueTypes.cpp
index 122d085..af0d9f4 100644
--- a/utils/TableGen/TGValueTypes.cpp
+++ b/utils/TableGen/TGValueTypes.cpp
@@ -16,7 +16,6 @@
 
 #include "llvm/CodeGen/ValueTypes.h"
 #include <map>
-#include <vector>
 using namespace llvm;
 
 namespace llvm {
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 3b7dc01..aa92302 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -65,6 +65,7 @@ enum ActionType {
   GenClangAttrSpellingList,
   GenClangDiagsDefs,
   GenClangDiagGroups,
+  GenClangDiagsIndexName,
   GenClangDeclNodes,
   GenClangStmtNodes,
   GenClangSACheckers,
@@ -133,12 +134,16 @@ namespace {
                                "Generate clang PCH attribute reader"),
                     clEnumValN(GenClangAttrPCHWrite, "gen-clang-attr-pch-write",
                                "Generate clang PCH attribute writer"),
-                    clEnumValN(GenClangAttrSpellingList, "gen-clang-attr-spelling-list",
+                    clEnumValN(GenClangAttrSpellingList,
+                               "gen-clang-attr-spelling-list",
                                "Generate a clang attribute spelling list"),
                     clEnumValN(GenClangDiagsDefs, "gen-clang-diags-defs",
                                "Generate Clang diagnostics definitions"),
                     clEnumValN(GenClangDiagGroups, "gen-clang-diag-groups",
                                "Generate Clang diagnostic groups"),
+                    clEnumValN(GenClangDiagsIndexName,
+                               "gen-clang-diags-index-name",
+                               "Generate Clang diagnostic name index"),
                     clEnumValN(GenClangDeclNodes, "gen-clang-decl-nodes",
                                "Generate Clang AST declaration nodes"),
                     clEnumValN(GenClangStmtNodes, "gen-clang-stmt-nodes",
@@ -295,6 +300,9 @@ int main(int argc, char **argv) {
     case GenClangDiagGroups:
       ClangDiagGroupsEmitter(Records).run(Out.os());
       break;
+    case GenClangDiagsIndexName:
+      ClangDiagsIndexNameEmitter(Records).run(Out.os());
+      break;
     case GenClangDeclNodes:
       ClangASTNodesEmitter(Records, "Decl", "Decl").run(Out.os());
       ClangDeclContextEmitter(Records).run(Out.os());
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index 94797f5..7431059 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -18,6 +18,7 @@
 #include "X86DisassemblerTables.h"
 
 #include "TableGenBackend.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 
@@ -46,9 +47,11 @@ static inline bool inheritsFrom(InstructionContext child,
   case IC_OPSIZE:
     return(inheritsFrom(child, IC_64BIT_OPSIZE));
   case IC_XD:
-    return(inheritsFrom(child, IC_64BIT_XD));
+    return(inheritsFrom(child, IC_64BIT_XD) ||
+           inheritsFrom(child, IC_VEX_XD));
   case IC_XS:
-    return(inheritsFrom(child, IC_64BIT_XS));
+    return(inheritsFrom(child, IC_64BIT_XS) ||
+           inheritsFrom(child, IC_VEX_XS));
   case IC_64BIT_REXW:
     return(inheritsFrom(child, IC_64BIT_REXW_XS) ||
            inheritsFrom(child, IC_64BIT_REXW_XD) ||
@@ -65,6 +68,35 @@ static inline bool inheritsFrom(InstructionContext child,
     return false;
   case IC_64BIT_REXW_OPSIZE:
     return false;
+  case IC_VEX:
+    return(inheritsFrom(child, IC_VEX_XS) ||
+           inheritsFrom(child, IC_VEX_XD) ||
+           inheritsFrom(child, IC_VEX_L) ||
+           inheritsFrom(child, IC_VEX_W) ||
+           inheritsFrom(child, IC_VEX_OPSIZE));
+  case IC_VEX_XS:
+    return(inheritsFrom(child, IC_VEX_L_XS) ||
+           inheritsFrom(child, IC_VEX_W_XS));
+  case IC_VEX_XD:
+    return(inheritsFrom(child, IC_VEX_L_XD) ||
+           inheritsFrom(child, IC_VEX_W_XD));
+  case IC_VEX_L:
+    return(inheritsFrom(child, IC_VEX_L_XS) ||
+           inheritsFrom(child, IC_VEX_L_XD));
+  case IC_VEX_L_XS:
+    return false;
+  case IC_VEX_L_XD:
+    return false;
+  case IC_VEX_W:
+    return(inheritsFrom(child, IC_VEX_W_XS) ||
+           inheritsFrom(child, IC_VEX_W_XD) ||
+           inheritsFrom(child, IC_VEX_W_OPSIZE));
+  case IC_VEX_W_XS:
+    return false;
+  case IC_VEX_W_XD:
+    return false;
+  case IC_VEX_OPSIZE:
+    return inheritsFrom(child, IC_VEX_W_OPSIZE);
   default:
     return false;
   }
@@ -236,7 +268,7 @@ static const char* stringForModifierType(ModifierType mt)
 DisassemblerTables::DisassemblerTables() {
   unsigned i;
   
-  for (i = 0; i < 4; i++) {
+  for (i = 0; i < array_lengthof(Tables); i++) {
     Tables[i] = new ContextDecision;
     memset(Tables[i], 0, sizeof(ContextDecision));
   }
@@ -247,7 +279,7 @@ DisassemblerTables::DisassemblerTables() {
 DisassemblerTables::~DisassemblerTables() {
   unsigned i;
   
-  for (i = 0; i < 4; i++)
+  for (i = 0; i < array_lengthof(Tables); i++)
     delete Tables[i];
 }
   
@@ -461,7 +493,29 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
   for (index = 0; index < 256; ++index) {
     o.indent(i * 2);
 
-    if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
+    if ((index & ATTR_VEXL) && (index & ATTR_OPSIZE))
+      o << "IC_VEX_L_OPSIZE";
+    else if ((index & ATTR_VEXL) && (index & ATTR_XD))
+      o << "IC_VEX_L_XD";
+    else if ((index & ATTR_VEXL) && (index & ATTR_XS))
+      o << "IC_VEX_L_XS";
+    else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_OPSIZE))
+      o << "IC_VEX_W_OPSIZE";
+    else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XD))
+      o << "IC_VEX_W_XD";
+    else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XS))
+      o << "IC_VEX_W_XS";
+    else if (index & ATTR_VEXL)
+      o << "IC_VEX_L";
+    else if ((index & ATTR_VEX) && (index & ATTR_REXW))
+      o << "IC_VEX_W";
+    else if ((index & ATTR_VEX) && (index & ATTR_OPSIZE))
+      o << "IC_VEX_OPSIZE";
+    else if ((index & ATTR_VEX) && (index & ATTR_XD))
+      o << "IC_VEX_XD";
+    else if ((index & ATTR_VEX) && (index & ATTR_XS))
+      o << "IC_VEX_XS";
+    else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
       o << "IC_64BIT_REXW_XS";
     else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD))
       o << "IC_64BIT_REXW_XD";
@@ -484,6 +538,8 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
       o << "IC_XD";
     else if (index & ATTR_OPSIZE)
       o << "IC_OPSIZE";
+    else if (index & ATTR_VEX)
+      o << "IC_VEX";
     else
       o << "IC";
 
@@ -510,6 +566,8 @@ void DisassemblerTables::emitContextDecisions(raw_ostream &o1,
   emitContextDecision(o1, o2, i1, i2, *Tables[1], TWOBYTE_STR);
   emitContextDecision(o1, o2, i1, i2, *Tables[2], THREEBYTE38_STR);
   emitContextDecision(o1, o2, i1, i2, *Tables[3], THREEBYTE3A_STR);
+  emitContextDecision(o1, o2, i1, i2, *Tables[4], THREEBYTEA6_STR);
+  emitContextDecision(o1, o2, i1, i2, *Tables[5], THREEBYTEA7_STR);
 }
 
 void DisassemblerTables::emit(raw_ostream &o) const {
diff --git a/utils/TableGen/X86DisassemblerTables.h b/utils/TableGen/X86DisassemblerTables.h
index 08eba01..d16ebfc 100644
--- a/utils/TableGen/X86DisassemblerTables.h
+++ b/utils/TableGen/X86DisassemblerTables.h
@@ -39,7 +39,9 @@ private:
   /// [1] two-byte opcodes of the form 0f __
   /// [2] three-byte opcodes of the form 0f 38 __
   /// [3] three-byte opcodes of the form 0f 3a __
-  ContextDecision* Tables[4];
+  /// [4] three-byte opcodes of the form 0f a6 __
+  /// [5] three-byte opcodes of the form 0f a7 __
+  ContextDecision* Tables[6];
   
   /// The instruction information table
   std::vector<InstructionSpecifier> InstructionSpecifiers;
@@ -77,7 +79,7 @@ private:
   ///   regardless of ModR/M byte, two entries - one for bytes 0x00-0xbf and one
   ///   for bytes 0xc0-0xff -, or 256 entries, one for each possible byte.  
   ///   nnnn is the number of a table for looking up these values.  The tables
-  ///   are writen separately so that tables consisting entirely of zeros will
+  ///   are written separately so that tables consisting entirely of zeros will
   ///   not be duplicated.  (These all have the name modRMEmptyTable.)  A table
   ///   is printed as:
   ///   
@@ -141,8 +143,9 @@ private:
   ///     }
   ///   }
   ///
-  ///   NAME is the name of the ContextDecision (typically one of the four names 
-  ///   ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, and THREEBYTE3A_SYM from
+  ///   NAME is the name of the ContextDecision (typically one of the four names
+  ///   ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, THREEBYTE3A_SYM,
+  ///   THREEBYTEA6_SYM, and THREEBYTEA7_SYM from
   ///   X86DisassemblerDecoderCommon.h).
   ///   IC is one of the contexts in InstructionContext.  There is an opcode
   ///   decision for each possible context.
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index b0839c3..f7518a9 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -68,7 +68,7 @@ namespace X86Local {
     DC = 7, DD = 8, DE = 9, DF = 10,
     XD = 11,  XS = 12,
     T8 = 13,  P_TA = 14,
-    P_0F_AE = 16, P_0F_01 = 17
+    A6 = 15,  A7 = 16
   };
 }
 
@@ -214,7 +214,9 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
   
   HasOpSizePrefix  = Rec->getValueAsBit("hasOpSizePrefix");
   HasREX_WPrefix   = Rec->getValueAsBit("hasREX_WPrefix");
+  HasVEXPrefix     = Rec->getValueAsBit("hasVEXPrefix");
   HasVEX_4VPrefix  = Rec->getValueAsBit("hasVEX_4VPrefix");
+  HasVEX_WPrefix   = Rec->getValueAsBit("hasVEX_WPrefix");
   HasLockPrefix    = Rec->getValueAsBit("hasLockPrefix");
   IsCodeGenOnly    = Rec->getValueAsBit("isCodeGenOnly");
   
@@ -224,7 +226,8 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
   Operands = &insn.Operands.OperandList;
   
   IsSSE            = HasOpSizePrefix && (Name.find("16") == Name.npos);
-  HasFROperands    = false;
+  HasFROperands    = hasFROperands();
+  HasVEX_LPrefix   = has256BitOperands() || Rec->getValueAsBit("hasVEX_L");
   
   ShouldBeEmitted  = true;
 }
@@ -248,7 +251,32 @@ void RecognizableInstr::processInstr(DisassemblerTables &tables,
 InstructionContext RecognizableInstr::insnContext() const {
   InstructionContext insnContext;
 
-  if (Name.find("64") != Name.npos || HasREX_WPrefix) {
+  if (HasVEX_4VPrefix || HasVEXPrefix) {
+    if (HasOpSizePrefix && HasVEX_LPrefix)
+      insnContext = IC_VEX_L_OPSIZE;
+    else if (HasOpSizePrefix && HasVEX_WPrefix)
+      insnContext = IC_VEX_W_OPSIZE;
+    else if (HasOpSizePrefix)
+      insnContext = IC_VEX_OPSIZE;
+    else if (HasVEX_LPrefix && Prefix == X86Local::XS)
+      insnContext = IC_VEX_L_XS;
+    else if (HasVEX_LPrefix && Prefix == X86Local::XD)
+      insnContext = IC_VEX_L_XD;
+    else if (HasVEX_WPrefix && Prefix == X86Local::XS)
+      insnContext = IC_VEX_W_XS;
+    else if (HasVEX_WPrefix && Prefix == X86Local::XD)
+      insnContext = IC_VEX_W_XD;
+    else if (HasVEX_WPrefix)
+      insnContext = IC_VEX_W;
+    else if (HasVEX_LPrefix)
+      insnContext = IC_VEX_L;
+    else if (Prefix == X86Local::XD)
+      insnContext = IC_VEX_XD;
+    else if (Prefix == X86Local::XS)
+      insnContext = IC_VEX_XS;
+    else
+      insnContext = IC_VEX;
+  } else if (Name.find("64") != Name.npos || HasREX_WPrefix) {
     if (HasREX_WPrefix && HasOpSizePrefix)
       insnContext = IC_64BIT_REXW_OPSIZE;
     else if (HasOpSizePrefix)
@@ -280,6 +308,10 @@ InstructionContext RecognizableInstr::insnContext() const {
 }
   
 RecognizableInstr::filter_ret RecognizableInstr::filter() const {
+  ///////////////////
+  // FILTER_STRONG
+  //
+    
   // Filter out intrinsics
   
   if (!Rec->isSubClassOf("X86Inst"))
@@ -291,26 +323,71 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const {
   
   if (Form == X86Local::MRMInitReg)
     return FILTER_STRONG;
+    
+    
+  // TEMPORARY pending bug fixes
 
-  
+  if (Name.find("VMOVDQU") != Name.npos ||
+      Name.find("VMOVDQA") != Name.npos ||
+      Name.find("VROUND") != Name.npos)
+    return FILTER_STRONG;
+    
+  // Filter out artificial instructions
+    
+  if (Name.find("TAILJMP") != Name.npos    ||
+      Name.find("_Int") != Name.npos       ||
+      Name.find("_int") != Name.npos       ||
+      Name.find("Int_") != Name.npos       ||
+      Name.find("_NOREX") != Name.npos     ||
+      Name.find("_TC") != Name.npos        ||
+      Name.find("EH_RETURN") != Name.npos  ||
+      Name.find("V_SET") != Name.npos      ||
+      Name.find("LOCK_") != Name.npos      ||
+      Name.find("WIN") != Name.npos        ||
+      Name.find("_AVX") != Name.npos       ||
+      Name.find("2SDL") != Name.npos)
+    return FILTER_STRONG;
+
+  // Filter out instructions with segment override prefixes.
+  // They're too messy to handle now and we'll special case them if needed.
+    
+  if (SegOvr)
+    return FILTER_STRONG;
+    
+  // Filter out instructions that can't be printed.
+    
+  if (AsmString.size() == 0)
+    return FILTER_STRONG;
+   
+  // Filter out instructions with subreg operands.
+   
+  if (AsmString.find("subreg") != AsmString.npos)
+    return FILTER_STRONG;
+
+  /////////////////
+  // FILTER_WEAK
+  //
+
+    
   // Filter out instructions with a LOCK prefix;
   //   prefer forms that do not have the prefix
   if (HasLockPrefix)
     return FILTER_WEAK;
-  
-  // Filter out artificial instructions
 
-  if (Name.find("TAILJMP") != Name.npos    ||
-     Name.find("_Int") != Name.npos       ||
-     Name.find("_int") != Name.npos       ||
-     Name.find("Int_") != Name.npos       ||
-     Name.find("_NOREX") != Name.npos     ||
-     Name.find("_TC") != Name.npos     ||
-     Name.find("EH_RETURN") != Name.npos  ||
-     Name.find("V_SET") != Name.npos      ||
-     Name.find("LOCK_") != Name.npos      ||
-     Name.find("WIN") != Name.npos)
-    return FILTER_STRONG;
+  // Filter out alternate forms of AVX instructions
+  if (Name.find("_alt") != Name.npos ||
+      Name.find("XrYr") != Name.npos ||
+      Name.find("r64r") != Name.npos ||
+      Name.find("_64mr") != Name.npos ||
+      Name.find("Xrr") != Name.npos ||
+      Name.find("rr64") != Name.npos)
+    return FILTER_WEAK;
+    
+  if (Name == "VMASKMOVDQU64"  ||
+      Name == "VEXTRACTPSrr64" ||
+      Name == "VMOVQd64rr"     ||
+      Name == "VMOVQs64rr")
+    return FILTER_WEAK;
 
   // Special cases.
 
@@ -339,6 +416,7 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const {
       Name == "PUSH32i16"         ||
       Name == "PUSH64i16"         ||
       Name == "MOVPQI2QImr"       ||
+      Name == "VMOVPQI2QImr"      ||
       Name == "MOVSDmr"           ||
       Name == "MOVSDrm"           ||
       Name == "MOVSSmr"           ||
@@ -349,22 +427,6 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const {
       Name == "CRC32r16")
     return FILTER_WEAK;
 
-  // Filter out instructions with segment override prefixes.
-  // They're too messy to handle now and we'll special case them if needed.
-
-  if (SegOvr)
-    return FILTER_STRONG;
-  
-  // Filter out instructions that can't be printed.
-
-  if (AsmString.size() == 0)
-    return FILTER_STRONG;
-  
-  // Filter out instructions with subreg operands.
-  
-  if (AsmString.find("subreg") != AsmString.npos)
-    return FILTER_STRONG;
-
   if (HasFROperands && Name.find("MOV") != Name.npos &&
      ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) || 
       (Name.find("to") != Name.npos)))
@@ -372,6 +434,33 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const {
 
   return FILTER_NORMAL;
 }
+
+bool RecognizableInstr::hasFROperands() const {
+  const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands;
+  unsigned numOperands = OperandList.size();
+
+  for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
+    const std::string &recName = OperandList[operandIndex].Rec->getName();
+      
+    if (recName.find("FR") != recName.npos)
+      return true;
+  }
+  return false;
+}
+
+bool RecognizableInstr::has256BitOperands() const {
+  const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands;
+  unsigned numOperands = OperandList.size();
+    
+  for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
+    const std::string &recName = OperandList[operandIndex].Rec->getName();
+       
+    if (!recName.compare("VR256") || !recName.compare("f256mem")) {
+      return true;
+    }
+  }
+  return false;
+}
   
 void RecognizableInstr::handleOperand(
   bool optional,
@@ -395,13 +484,13 @@ void RecognizableInstr::handleOperand(
   }
   
   const std::string &typeName = (*Operands)[operandIndex].Rec->getName();
-  
+
   Spec->operands[operandIndex].encoding = encodingFromString(typeName,
                                                               HasOpSizePrefix);
   Spec->operands[operandIndex].type = typeFromString(typeName, 
-                                                      IsSSE,
-                                                      HasREX_WPrefix,
-                                                      HasOpSizePrefix);
+                                                     IsSSE,
+                                                     HasREX_WPrefix,
+                                                     HasOpSizePrefix);
   
   ++operandIndex;
   ++physicalOperandIndex;
@@ -530,31 +619,45 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
   case X86Local::MRMSrcReg:
     // Operand 1 is a register operand in the Reg/Opcode field.
     // Operand 2 is a register operand in the R/M field.
+    // - In AVX, there is a register operand in the VEX.vvvv field here -
     // Operand 3 (optional) is an immediate.
-    assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
-           "Unexpected number of operands for MRMSrcRegFrm");
-    HANDLE_OPERAND(roRegister)
-    HANDLE_OPERAND(rmRegister)
 
     if (HasVEX_4VPrefix)
+      assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 4 &&
+             "Unexpected number of operands for MRMSrcRegFrm with VEX_4V"); 
+    else
+      assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
+             "Unexpected number of operands for MRMSrcRegFrm");
+  
+    HANDLE_OPERAND(roRegister)
+       
+    if (HasVEX_4VPrefix)
       // FIXME: In AVX, the register below becomes the one encoded
       // in ModRMVEX and the one above the one in the VEX.VVVV field
-      HANDLE_OPTIONAL(rmRegister)
-    else
-      HANDLE_OPTIONAL(immediate)
+      HANDLE_OPERAND(vvvvRegister)
+          
+    HANDLE_OPERAND(rmRegister)
+    HANDLE_OPTIONAL(immediate)
     break;
   case X86Local::MRMSrcMem:
     // Operand 1 is a register operand in the Reg/Opcode field.
     // Operand 2 is a memory operand (possibly SIB-extended)
+    // - In AVX, there is a register operand in the VEX.vvvv field here -
     // Operand 3 (optional) is an immediate.
-    assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
-           "Unexpected number of operands for MRMSrcMemFrm");
+    
+    if (HasVEX_4VPrefix)
+      assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 4 &&
+             "Unexpected number of operands for MRMSrcMemFrm with VEX_4V"); 
+    else
+      assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
+             "Unexpected number of operands for MRMSrcMemFrm");
+    
     HANDLE_OPERAND(roRegister)
 
     if (HasVEX_4VPrefix)
       // FIXME: In AVX, the register below becomes the one encoded
       // in ModRMVEX and the one above the one in the VEX.VVVV field
-      HANDLE_OPTIONAL(rmRegister)
+      HANDLE_OPERAND(vvvvRegister)
 
     HANDLE_OPERAND(memory)
     HANDLE_OPTIONAL(immediate)
@@ -569,8 +672,14 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
   case X86Local::MRM7r:
     // Operand 1 is a register operand in the R/M field.
     // Operand 2 (optional) is an immediate or relocation.
-    assert(numPhysicalOperands <= 2 &&
-           "Unexpected number of operands for MRMnRFrm");
+    if (HasVEX_4VPrefix)
+      assert(numPhysicalOperands <= 3 &&
+             "Unexpected number of operands for MRMSrcMemFrm with VEX_4V");
+    else
+      assert(numPhysicalOperands <= 2 &&
+             "Unexpected number of operands for MRMnRFrm");
+    if (HasVEX_4VPrefix)
+      HANDLE_OPERAND(vvvvRegister);
     HANDLE_OPTIONAL(rmRegister)
     HANDLE_OPTIONAL(relocation)
     break;
@@ -687,6 +796,22 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
       filter = new DumbFilter();
     opcodeToSet = Opcode;
     break;
+  case X86Local::A6:
+    opcodeType = THREEBYTE_A6;
+    if (needsModRMForDecode(Form))
+      filter = new ModFilter(isRegFormat(Form));
+    else
+      filter = new DumbFilter();
+    opcodeToSet = Opcode;
+    break;
+  case X86Local::A7:
+    opcodeType = THREEBYTE_A7;
+    if (needsModRMForDecode(Form))
+      filter = new ModFilter(isRegFormat(Form));
+    else
+      filter = new DumbFilter();
+    opcodeToSet = Opcode;
+    break;
   case X86Local::D8:
   case X86Local::D9:
   case X86Local::DA:
@@ -854,6 +979,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("ssmem",               TYPE_M32FP)
   TYPE("RST",                 TYPE_ST)
   TYPE("i128mem",             TYPE_M128)
+  TYPE("i256mem",             TYPE_M256)
   TYPE("i64i32imm_pcrel",     TYPE_REL64)
   TYPE("i16imm_pcrel",        TYPE_REL16)
   TYPE("i32imm_pcrel",        TYPE_REL32)
@@ -878,6 +1004,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("offset16",            TYPE_MOFFS16)
   TYPE("offset32",            TYPE_MOFFS32)
   TYPE("offset64",            TYPE_MOFFS64)
+  TYPE("VR256",               TYPE_XMM256)
   errs() << "Unhandled type string " << s << "\n";
   llvm_unreachable("Unhandled type string");
 }
@@ -900,6 +1027,10 @@ OperandEncoding RecognizableInstr::immediateEncodingFromString
   ENCODING("i64i32imm",       ENCODING_ID)
   ENCODING("i64i8imm",        ENCODING_IB)
   ENCODING("i8imm",           ENCODING_IB)
+  // This is not a typo.  Instructions like BLENDVPD put
+  // register IDs in 8-bit immediates nowadays.
+  ENCODING("VR256",           ENCODING_IB)
+  ENCODING("VR128",           ENCODING_IB)
   errs() << "Unhandled immediate encoding " << s << "\n";
   llvm_unreachable("Unhandled immediate encoding");
 }
@@ -915,6 +1046,7 @@ OperandEncoding RecognizableInstr::rmRegisterEncodingFromString
   ENCODING("FR64",            ENCODING_RM)
   ENCODING("FR32",            ENCODING_RM)
   ENCODING("VR64",            ENCODING_RM)
+  ENCODING("VR256",           ENCODING_RM)
   errs() << "Unhandled R/M register encoding " << s << "\n";
   llvm_unreachable("Unhandled R/M register encoding");
 }
@@ -933,10 +1065,22 @@ OperandEncoding RecognizableInstr::roRegisterEncodingFromString
   ENCODING("SEGMENT_REG",     ENCODING_REG)
   ENCODING("DEBUG_REG",       ENCODING_REG)
   ENCODING("CONTROL_REG",     ENCODING_REG)
+  ENCODING("VR256",           ENCODING_REG)
   errs() << "Unhandled reg/opcode register encoding " << s << "\n";
   llvm_unreachable("Unhandled reg/opcode register encoding");
 }
 
+OperandEncoding RecognizableInstr::vvvvRegisterEncodingFromString
+  (const std::string &s,
+   bool hasOpSizePrefix) {
+  ENCODING("FR32",            ENCODING_VVVV)
+  ENCODING("FR64",            ENCODING_VVVV)
+  ENCODING("VR128",           ENCODING_VVVV)
+  ENCODING("VR256",           ENCODING_VVVV)
+  errs() << "Unhandled VEX.vvvv register encoding " << s << "\n";
+  llvm_unreachable("Unhandled VEX.vvvv register encoding");
+}
+
 OperandEncoding RecognizableInstr::memoryEncodingFromString
   (const std::string &s,
    bool hasOpSizePrefix) {
@@ -951,6 +1095,7 @@ OperandEncoding RecognizableInstr::memoryEncodingFromString
   ENCODING("f64mem",          ENCODING_RM)
   ENCODING("f32mem",          ENCODING_RM)
   ENCODING("i128mem",         ENCODING_RM)
+  ENCODING("i256mem",         ENCODING_RM)
   ENCODING("f80mem",          ENCODING_RM)
   ENCODING("lea32mem",        ENCODING_RM)
   ENCODING("lea64_32mem",     ENCODING_RM)
diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h
index c043b90..c7ec18c 100644
--- a/utils/TableGen/X86RecognizableInstr.h
+++ b/utils/TableGen/X86RecognizableInstr.h
@@ -52,8 +52,14 @@ private:
   bool HasOpSizePrefix;
   /// The hasREX_WPrefix field from the record
   bool HasREX_WPrefix;
+  /// The hasVEXPrefix field from the record
+  bool HasVEXPrefix;
   /// The hasVEX_4VPrefix field from the record
   bool HasVEX_4VPrefix;
+  /// The hasVEX_WPrefix field from the record
+  bool HasVEX_WPrefix;
+  /// Inferred from the operands; indicates whether the L bit in the VEX prefix is set
+  bool HasVEX_LPrefix;
   /// The hasLockPrefix field from the record
   bool HasLockPrefix;
   /// The isCodeGenOnly filed from the record
@@ -96,7 +102,7 @@ private:
                       // error if it conflcits with any other FILTER_NORMAL
                       // instruction
   };
-  
+      
   /// filter - Determines whether the instruction should be decodable.  Some 
   ///   instructions are pure intrinsics and use unencodable operands; many
   ///   synthetic instructions are duplicates of other instructions; other
@@ -106,6 +112,12 @@ private:
   ///
   /// @return - The degree of filtering to be applied (see filter_ret).
   filter_ret filter() const;
+
+  /// hasFROperands - Returns true if any operand is a FR operand.
+  bool hasFROperands() const;
+  
+  /// has256BitOperands - Returns true if any operand is a 256-bit SSE operand.
+  bool has256BitOperands() const;
   
   /// typeFromString - Translates an operand type from the string provided in
   ///   the LLVM tables to an OperandType for use in the operand specifier.
@@ -155,6 +167,8 @@ private:
                                                       bool hasOpSizePrefix);
   static OperandEncoding opcodeModifierEncodingFromString(const std::string &s,
                                                           bool hasOpSizePrefix);
+  static OperandEncoding vvvvRegisterEncodingFromString(const std::string &s,
+                                                        bool HasOpSizePrefix);
   
   /// handleOperand - Converts a single operand from the LLVM table format to
   ///   the emitted table format, handling any duplicate operands it encounters
diff --git a/utils/buildit/GNUmakefile b/utils/buildit/GNUmakefile
index 5140e15..470ee76 100644
--- a/utils/buildit/GNUmakefile
+++ b/utils/buildit/GNUmakefile
@@ -6,7 +6,7 @@
 #
 # You can specify TARGETS=ppc (or i386) on the buildit command line to limit the
 # build to just one target. The default is for ppc and i386. The compiler
-# targetted at this host gets built anyway, but not installed unless it's listed
+# targeted at this host gets built anyway, but not installed unless it's listed
 # in TARGETS.
 
 # Include the set of standard Apple makefile definitions.
diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm
index 38b0bfd..5665e4c 100755
--- a/utils/buildit/build_llvm
+++ b/utils/buildit/build_llvm
@@ -132,7 +132,7 @@ elif [ "$IOS_SIM_BUILD" = yes ]; then
   configure_opts="--enable-targets=x86 --host=i686-apple-darwin_sim \
                   --build=i686-apple-darwin10"
 else
-  configure_opts="--enable-targets=arm,x86,powerpc,cbe"
+  configure_opts="--enable-targets=arm,x86,cbe"
 fi
 
 if [ \! -f Makefile.config ]; then
@@ -261,7 +261,7 @@ elif [ $MACOSX_DEPLOYMENT_TARGET = "10.5" ]; then
         -exec lipo -extract ppc7400 -extract i386 {} -output {} \;
 else
     find . -perm 755 -type f \! \( -name '*gccas' -o -name '*gccld' -o -name llvm-config \) \
-        -exec lipo -extract ppc7400 -extract i386 -extract x86_64 {} -output {} \;
+        -exec lipo -extract i386 -extract x86_64 {} -output {} \;
 fi
 
 # The Hello dylib is an example of how to build a pass.
diff --git a/utils/lit/lit/ProgressBar.py b/utils/lit/lit/ProgressBar.py
index 85c95f5..5c85a17 100644
--- a/utils/lit/lit/ProgressBar.py
+++ b/utils/lit/lit/ProgressBar.py
@@ -105,6 +105,7 @@ class TerminalController:
         # Look up numeric capabilities.
         self.COLS = curses.tigetnum('cols')
         self.LINES = curses.tigetnum('lines')
+        self.XN = curses.tigetflag('xenl')
         
         # Look up string capabilities.
         for capability in self._STRING_CAPABILITIES:
@@ -205,7 +206,7 @@ class ProgressBar:
     The progress bar is colored, if the terminal supports color
     output; and adjusts to the width of the terminal.
     """
-    BAR = '%s${GREEN}[${BOLD}%s%s${NORMAL}${GREEN}]${NORMAL}%s\n'
+    BAR = '%s${GREEN}[${BOLD}%s%s${NORMAL}${GREEN}]${NORMAL}%s'
     HEADER = '${BOLD}${CYAN}%s${NORMAL}\n\n'
         
     def __init__(self, term, header, useETA=True):
@@ -213,7 +214,15 @@ class ProgressBar:
         if not (self.term.CLEAR_EOL and self.term.UP and self.term.BOL):
             raise ValueError("Terminal isn't capable enough -- you "
                              "should use a simpler progress dispaly.")
-        self.width = self.term.COLS or 75
+        self.BOL = self.term.BOL # BoL from col#79
+        self.XNL = "\n" # Newline from col#79
+        if self.term.COLS:
+            self.width = self.term.COLS
+            if not self.term.XN:
+                self.BOL = self.term.UP + self.term.BOL
+                self.XNL = "" # Cursor must be fed to the next line
+        else:
+            self.width = 75
         self.bar = term.render(self.BAR)
         self.header = self.term.render(self.HEADER % header.center(self.width))
         self.cleared = 1 #: true if we haven't drawn the bar yet.
@@ -244,15 +253,19 @@ class ProgressBar:
         else:
             message = '... ' + message[-(self.width-4):]
         sys.stdout.write(
-            self.term.BOL + self.term.UP + self.term.CLEAR_EOL +
+            self.BOL + self.term.UP + self.term.CLEAR_EOL +
             (self.bar % (prefix, '='*n, '-'*(barWidth-n), suffix)) +
+            self.XNL +
             self.term.CLEAR_EOL + message)
+        if not self.term.XN:
+            sys.stdout.flush()
 
     def clear(self):
         if not self.cleared:
-            sys.stdout.write(self.term.BOL + self.term.CLEAR_EOL +
+            sys.stdout.write(self.BOL + self.term.CLEAR_EOL +
                              self.term.UP + self.term.CLEAR_EOL +
                              self.term.UP + self.term.CLEAR_EOL)
+            sys.stdout.flush()
             self.cleared = 1
 
 def test():
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index dba7814..80d0ba1 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -337,23 +337,28 @@ def executeTclScriptInternal(test, litConfig, tmpBase, commands, cwd):
     return out, err, exitCode
 
 def executeScript(test, litConfig, tmpBase, commands, cwd):
+    bashPath = litConfig.getBashPath();
+    isWin32CMDEXE = (litConfig.isWindows and not bashPath)
     script = tmpBase + '.script'
-    if litConfig.isWindows:
+    if isWin32CMDEXE:
         script += '.bat'
 
     # Write script file
     f = open(script,'w')
-    if litConfig.isWindows:
+    if isWin32CMDEXE:
         f.write('\nif %ERRORLEVEL% NEQ 0 EXIT\n'.join(commands))
     else:
         f.write(' &&\n'.join(commands))
     f.write('\n')
     f.close()
 
-    if litConfig.isWindows:
+    if isWin32CMDEXE:
         command = ['cmd','/c', script]
     else:
-        command = ['/bin/sh', script]
+        if bashPath:
+            command = [bashPath, script]
+        else:
+            command = ['/bin/sh', script]
         if litConfig.useValgrind:
             # FIXME: Running valgrind on sh is overkill. We probably could just
             # run on clang with no real loss.
@@ -553,7 +558,7 @@ def executeShTest(test, litConfig, useExternalSh):
     if test.config.unsupported:
         return (Test.UNSUPPORTED, 'Test is unsupported')
 
-    res = parseIntegratedTestScript(test)
+    res = parseIntegratedTestScript(test, useExternalSh)
     if len(res) == 2:
         return res
 
diff --git a/utils/lit/lit/TestingConfig.py b/utils/lit/lit/TestingConfig.py
index 0d9bc00..25bb341 100644
--- a/utils/lit/lit/TestingConfig.py
+++ b/utils/lit/lit/TestingConfig.py
@@ -17,7 +17,7 @@ class TestingConfig:
                 'PATHEXT' : os.environ.get('PATHEXT',''),
                 'SYSTEMROOT' : os.environ.get('SYSTEMROOT',''),
                 'LLVM_DISABLE_CRT_DEBUG' : '1',
-                'PRINTF_EXPONENT_DIGITS' : '2',
+                'PYTHONUNBUFFERED' : '1',
                 }
 
             config = TestingConfig(parent,
diff --git a/utils/lit/setup.py b/utils/lit/setup.py
index 738ee23..a94e6ea 100644
--- a/utils/lit/setup.py
+++ b/utils/lit/setup.py
@@ -38,7 +38,7 @@ Features
 Documentation
 =============
 
-The offical *lit* documentation is in the man page, available online at the LLVM
+The official *lit* documentation is in the man page, available online at the LLVM
 Command Guide: http://llvm.org/cmds/lit.html.
 
 
diff --git a/utils/llvm-lit/Makefile b/utils/llvm-lit/Makefile
index 702591f..77021bb 100644
--- a/utils/llvm-lit/Makefile
+++ b/utils/llvm-lit/Makefile
@@ -13,9 +13,10 @@ include $(LEVEL)/Makefile.common
 
 all:: $(ToolDir)/llvm-lit
 
-$(ToolDir)/llvm-lit: llvm-lit.in $(ToolDir)/.dir
+$(ToolDir)/llvm-lit: llvm-lit.in Makefile $(ToolDir)/.dir
 	$(Echo) "Creating 'llvm-lit' script..."
-	$(Verb)sed -e "s#@LLVM_SOURCE_DIR@#$(LLVM_SRC_ROOT)#g" \
-	     -e "s#@LLVM_BINARY_DIR@#$(LLVM_OBJ_ROOT)#g" \
-	     $< > $@
+	$(Verb)$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g > lit.tmp
+	$(Verb)$(ECHOPATH) s=@LLVM_BINARY_DIR@=$(LLVM_OBJ_ROOT)=g >> lit.tmp
+	$(Verb)sed -f lit.tmp $< > $@
 	$(Verb)chmod +x $@
+	$(Verb)rm -f lit.tmp
diff --git a/utils/llvmbuild b/utils/llvmbuild
index fb8500d..5912c50 100755
--- a/utils/llvmbuild
+++ b/utils/llvmbuild
@@ -650,7 +650,8 @@ class Builder(threading.Thread):
 
 
     def configure(self, component, srcdir, builddir, flags, env):
-        self.logger.debug("Configure " + str(flags))
+        self.logger.debug("Configure " + str(flags) + " " + str(srcdir) + " -> "
+                          + str(builddir))
 
         configure_files = dict(
             llvm=[(srcdir + "/configure", builddir + "/Makefile")],
@@ -721,8 +722,16 @@ branch_abbrev = get_path_abbrevs(set(options.branch))
 
 work_queue = queue.Queue()
 
-for t in range(options.threads):
-    jobs = options.jobs // options.threads
+jobs = options.jobs // options.threads
+if jobs == 0:
+    jobs = 1
+
+numthreads = options.threads
+if jobs < numthreads:
+    numthreads = jobs
+    jobs = 1
+
+for t in range(numthreads):
     builder = Builder(work_queue, jobs,
                       build_abbrev, source_abbrev, branch_abbrev,
                       options)
diff --git a/utils/profile.pl b/utils/profile.pl
index 3180115..782e5dc 100755
--- a/utils/profile.pl
+++ b/utils/profile.pl
@@ -65,7 +65,7 @@ shift @ARGV;
 my $libdir = `llvm-config --libdir`;
 chomp $libdir;
 
-my $LibProfPath = $libdir . "/profile_rt.so";
+my $LibProfPath = $libdir . "/libprofile_rt.so";
 
 system "opt -q -f $ProfilePass $BytecodeFile -o $BytecodeFile.inst";
 system "lli -fake-argv0 '$BytecodeFile' -load $LibProfPath " .
diff --git a/utils/release/findRegressions.py b/utils/release/findRegressions.py
new file mode 100755
index 0000000..e801dab
--- /dev/null
+++ b/utils/release/findRegressions.py
@@ -0,0 +1,130 @@
+#!/usr/bin/python
+import re, string, sys, os, time
+
+DEBUG = 0
+testDirName = 'llvm-test'
+test      = ['compile', 'llc', 'jit', 'cbe']
+exectime     = ['llc-time', 'jit-time', 'cbe-time',]
+comptime     = ['llc', 'jit-comptime', 'compile']
+
+(tp, exp) = ('compileTime_', 'executeTime_')
+
+def parse(file):
+  f=open(file, 'r')
+  d = f.read()
+  
+  #Cleanup weird stuff
+  d = re.sub(r',\d+:\d','', d)
+   
+  r = re.findall(r'TEST-(PASS|FAIL|RESULT.*?):\s+(.*?)\s+(.*?)\r*\n', d)
+   
+  test = {}
+  fname = ''
+  for t in r:
+    if DEBUG:
+      print t
+    if t[0] == 'PASS' or t[0] == 'FAIL' :
+      tmp = t[2].split(testDirName)
+      
+      if DEBUG:
+        print tmp
+      
+      if len(tmp) == 2:
+        fname = tmp[1].strip('\r\n')
+      else:
+        fname = tmp[0].strip('\r\n')
+      
+      if not test.has_key(fname) :
+        test[fname] = {}
+      
+      for k in test:
+        test[fname][k] = 'NA'
+        test[fname][t[1]] = t[0]
+        if DEBUG:
+          print test[fname][t[1]]
+    else :
+      try:
+        n = t[0].split('RESULT-')[1]
+        
+        if DEBUG:
+          print n;
+        
+        if n == 'llc' or n == 'jit-comptime' or n == 'compile':
+          test[fname][tp + n] = float(t[2].split(' ')[2])
+          if DEBUG:
+            print test[fname][tp + n]
+        
+        elif n.endswith('-time') :
+            test[fname][exp + n] = float(t[2].strip('\r\n'))
+            if DEBUG:
+              print test[fname][exp + n]
+        
+        else :
+          print "ERROR!"
+          sys.exit(1)
+      
+      except:
+          continue
+
+  return test
+
+# Diff results and look for regressions.
+def diffResults(d_old, d_new):
+
+  for t in sorted(d_old.keys()) :
+    if DEBUG:
+      print t
+        
+    if d_new.has_key(t) :
+    
+      # Check if the test passed or failed.
+      for x in test:
+        if d_old[t].has_key(x):
+          if d_new[t].has_key(x):
+            if d_old[t][x] == 'PASS':
+              if d_new[t][x] != 'PASS':
+                print t + " *** REGRESSION (" + x + ")\n"
+            else:
+              if d_new[t][x] == 'PASS':
+                print t + " * NEW PASS (" + x + ")\n"
+                
+          else :
+            print t + "*** REGRESSION (" + x + ")\n"
+        
+        # For execution time, if there is no result, its a fail.
+        for x in exectime:
+          if d_old[t].has_key(tp + x):
+            if not d_new[t].has_key(tp + x):
+              print t + " *** REGRESSION (" + tp + x + ")\n"
+                
+          else :
+            if d_new[t].has_key(tp + x):
+              print t + " * NEW PASS (" + tp + x + ")\n"
+
+       
+        for x in comptime:
+          if d_old[t].has_key(exp + x):
+            if not d_new[t].has_key(exp + x):
+              print t + " *** REGRESSION (" + exp + x + ")\n"
+                
+          else :
+            if d_new[t].has_key(exp + x):
+              print t + " * NEW PASS (" + exp + x + ")\n"
+              
+    else :
+      print t + ": Removed from test-suite.\n"
+    
+
+#Main
+if len(sys.argv) < 3 :
+    print 'Usage:', sys.argv[0], \
+          '<old log> <new log>'
+    sys.exit(-1)
+
+d_old = parse(sys.argv[1])
+d_new = parse(sys.argv[2])
+
+
+diffResults(d_old, d_new)
+
+
diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh
index e24638e..21d7fee 100755
--- a/utils/release/test-release.sh
+++ b/utils/release/test-release.sh
@@ -12,7 +12,7 @@
 #
 #===------------------------------------------------------------------------===#
 
-set -e
+set -e                          # Exit if any command fails
 
 Release=""
 Release_no_dot=""
@@ -33,9 +33,9 @@ function usage() {
     echo " -build-dir DIR    Directory to perform testing in. [default: pwd]"
     echo " -no-checkout      Don't checkout the sources from SVN."
     echo " -no-64bit         Don't test the 64-bit version. [default: yes]"
-    echo " -ada              Build Ada. [default: no]"
-    echo " -disable-objc     Disable ObjC build. [default: build]"
-    echo " -disable-fortran  Disable Fortran build. [default: build]"
+    echo " -enable-ada       Build Ada. [default: disable]"
+    echo " -disable-objc     Disable ObjC build. [default: enable]"
+    echo " -disable-fortran  Disable Fortran build. [default: enable]"
 }
 
 while [ $# -gt 0 ]; do
@@ -66,7 +66,7 @@ while [ $# -gt 0 ]; do
         -no-64bit | --no-64bit )
             do_64bit="no"
             ;;
-        -ada | --ada )
+        -enable-ada | --enable-ada )
             do_ada="yes"
             ;;
         -disable-objc | --disable-objc )
diff --git a/utils/show-diagnostics b/utils/show-diagnostics
new file mode 100755
index 0000000..3a69793
--- /dev/null
+++ b/utils/show-diagnostics
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+
+import plistlib
+
+def main():
+    from optparse import OptionParser, OptionGroup
+    parser = OptionParser("""\
+usage: %prog [options] <path>
+
+Utility for dumping Clang-style logged diagnostics.\
+""")
+    (opts, args) = parser.parse_args()
+
+    if len(args) != 1:
+        parser.error("invalid number of arguments")
+
+    path, = args
+
+    # Read the diagnostics log.
+    f = open(path)
+    try:
+        data = f.read()
+    finally:
+        f.close()
+
+    # Complete the plist (the log itself is just the chunks).
+    data = """\
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" \
+                       "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<array>
+%s
+</array>
+</plist>""" % data
+
+    # Load the diagnostics.
+    diags = plistlib.readPlistFromString(data)
+
+    # Print out the diagnostics.
+    print
+    print "**** BUILD DIAGNOSTICS ****"
+    for i, file_diags in enumerate(diags):
+        file = file_diags.get('main-file')
+        print "*** %s ***" % file
+        for d in file_diags.get('diagnostics', ()):
+            print "%s:%s:%s: %s: %s" % (
+                d.get('filename'), d.get('line'), d.get('column'),
+                d.get('level'), d.get('message'))
+
+if __name__ == "__main__":
+    main()
diff --git a/utils/unittest/UnitTestMain/Makefile b/utils/unittest/UnitTestMain/Makefile
index cec654f..3082779 100644
--- a/utils/unittest/UnitTestMain/Makefile
+++ b/utils/unittest/UnitTestMain/Makefile
@@ -27,4 +27,6 @@ ifneq ($(HAVE_PTHREAD), 1)
   CPP.Flags += -DGTEST_HAS_PTHREAD=0
 endif
 
+NO_INSTALL = 1
+
 include $(LEVEL)/Makefile.common
diff --git a/utils/unittest/googletest/gtest-filepath.cc b/utils/unittest/googletest/gtest-filepath.cc
index c1ef918..8d1d67e 100644
--- a/utils/unittest/googletest/gtest-filepath.cc
+++ b/utils/unittest/googletest/gtest-filepath.cc
@@ -123,7 +123,7 @@ FilePath FilePath::RemoveExtension(const char* extension) const {
   return *this;
 }
 
-// Returns a pointer to the last occurence of a valid path separator in
+// Returns a pointer to the last occurrence of a valid path separator in
 // the FilePath. On Windows, for example, both '/' and '\' are valid path
 // separators. Returns NULL if no path separator was found.
 const char* FilePath::FindLastPathSeparator() const {
diff --git a/utils/unittest/googletest/gtest.cc b/utils/unittest/googletest/gtest.cc
index 51732af..9aa5441 100644
--- a/utils/unittest/googletest/gtest.cc
+++ b/utils/unittest/googletest/gtest.cc
@@ -1415,7 +1415,7 @@ AssertionResult IsHRESULTFailure(const char* expr, long hr) {  // NOLINT
 // Utility functions for encoding Unicode text (wide strings) in
 // UTF-8.
 
-// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8
+// A Unicode code-point can have up to 21 bits, and is encoded in UTF-8
 // like this:
 //
 // Code-point length   Encoding
diff --git a/utils/unittest/googletest/include/gtest/gtest.h b/utils/unittest/googletest/include/gtest/gtest.h
index 921fad1..5470082 100644
--- a/utils/unittest/googletest/include/gtest/gtest.h
+++ b/utils/unittest/googletest/include/gtest/gtest.h
@@ -1258,6 +1258,8 @@ AssertionResult CmpHelperEQ(const char* expected_expression,
 #pragma warning(push)          // Saves the current warning state.
 #pragma warning(disable:4389)  // Temporarily disables warning on
                                // signed/unsigned mismatch.
+#pragma warning(disable:4805)  // Temporarily disables warning on
+                               // unsafe mix of types
 #endif
 
   if (expected == actual) {
diff --git a/utils/unittest/googletest/include/gtest/internal/gtest-filepath.h b/utils/unittest/googletest/include/gtest/internal/gtest-filepath.h
index 4b76d79..efbc176 100644
--- a/utils/unittest/googletest/include/gtest/internal/gtest-filepath.h
+++ b/utils/unittest/googletest/include/gtest/internal/gtest-filepath.h
@@ -196,7 +196,7 @@ class GTEST_API_ FilePath {
 
   void Normalize();
 
-  // Returns a pointer to the last occurence of a valid path separator in
+  // Returns a pointer to the last occurrence of a valid path separator in
   // the FilePath. On Windows, for example, both '/' and '\' are valid path
   // separators. Returns NULL if no path separator was found.
   const char* FindLastPathSeparator() const;
author	dim <dim@FreeBSD.org>	2011-05-02 19:34:44 +0000
committer	dim <dim@FreeBSD.org>	2011-05-02 19:34:44 +0000
commit	2b066988909948dc3d53d01760bc2d71d32f3feb (patch)
tree	fc5f365fb9035b2d0c622bbf06c9bbe8627d7279
parent	c80ac9d286b8fcc6d1ee5d76048134cf80aa9edc (diff)
download	FreeBSD-src-2b066988909948dc3d53d01760bc2d71d32f3feb.zip FreeBSD-src-2b066988909948dc3d53d01760bc2d71d32f3feb.tar.gz